public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
* [PATCH] conf: Bind inbound ports with CAP_NET_BIND_SERVICE before isolate_user()
@ 2022-10-13 16:34 Stefano Brivio
  2022-10-14  3:12 ` David Gibson
  0 siblings, 1 reply; 4+ messages in thread
From: Stefano Brivio @ 2022-10-13 16:34 UTC (permalink / raw)
  To: passt-dev; +Cc: David Gibson

Even if CAP_NET_BIND_SERVICE is granted, we'll lose the capability in
the target user namespace as we isolate the process, which means
we're unable to bind to low ports at that point.

Bind inbound ports, and only those, before isolate_user(). Keep the
handling of outbound ports (for pasta mode only) after the setup of
the namespace, because that's where we'll bind them.

To this end, initialise the netlink socket for the init namespace
before isolate_user() as well, as we actually need to know the
addresses of the upstream interface before binding ports, in case
they're not explicitly passed by the user.

As we now call nl_sock_init() twice, checking its return code from
conf() twice looks a bit heavy: make it exit(), instead, as we
can't do much if we don't have netlink sockets.

While at it:

- move the v4_only && v6_only options check just after the first
  option processing loop, as this is more strictly related to
  option parsing proper

- update the man page, explaining that CAP_NET_BIND_SERVICE is
  *not* the preferred way to bind ports, because passt and pasta
  can be abused to allow other processes to make effective usage
  of it. Add a note about the recommended sysctl instead

Reported-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
---
This is based on David's patchset "Fixes and cleanups for capability
handling".

 conf.c    | 71 +++++++++++++++++++++++++++----------------------------
 netlink.c | 20 +++++++++-------
 netlink.h |  2 +-
 passt.1   | 45 +++++++++++++++++++++++++++++------
 tap.c     |  1 +
 5 files changed, 87 insertions(+), 52 deletions(-)

diff --git a/conf.c b/conf.c
index a22ef48..e1f42f1 100644
--- a/conf.c
+++ b/conf.c
@@ -1530,6 +1530,11 @@ void conf(struct ctx *c, int argc, char **argv)
 		}
 	} while (name != -1);
 
+	if (v4_only && v6_only) {
+		err("Options ipv4-only and ipv6-only are mutually exclusive");
+		usage(argv[0]);
+	}
+
 	ret = conf_ugid(runas, &uid, &gid);
 	if (ret)
 		usage(argv[0]);
@@ -1539,6 +1544,30 @@ void conf(struct ctx *c, int argc, char **argv)
 			     logfile, logsize);
 	}
 
+	nl_sock_init(c, false);
+	if (!v6_only)
+		c->ifi4 = conf_ip4(ifi, &c->ip4, c->mac);
+	if (!v4_only)
+		c->ifi6 = conf_ip6(ifi, &c->ip6, c->mac);
+	if (!c->ifi4 && !c->ifi6) {
+		err("External interface not usable");
+		exit(EXIT_FAILURE);
+	}
+
+	/* Inbound port options can be parsed now (after IPv4/IPv6 settings) */
+	optind = 1;
+	do {
+		struct port_fwd *fwd;
+
+		name = getopt_long(argc, argv, optstring, options, NULL);
+
+		if ((name == 't' && (fwd = &c->tcp.fwd_in)) ||
+		    (name == 'u' && (fwd = &c->udp.fwd_in.f))) {
+			if (!optarg || conf_ports(c, name, optarg, fwd))
+				usage(argv[0]);
+		}
+	} while (name != -1);
+
 	if (c->mode == MODE_PASTA) {
 		if (conf_pasta_ns(&netns_only, userns, netns,
 				  optind, argc, argv) < 0)
@@ -1561,50 +1590,20 @@ void conf(struct ctx *c, int argc, char **argv)
 		}
 	}
 
-	if (nl_sock_init(c)) {
-		err("Failed to get netlink socket");
-		exit(EXIT_FAILURE);
-	}
-
-	if (v4_only && v6_only) {
-		err("Options ipv4-only and ipv6-only are mutually exclusive");
-		usage(argv[0]);
-	}
-	if (!v6_only)
-		c->ifi4 = conf_ip4(ifi, &c->ip4, c->mac);
-	if (!v4_only)
-		c->ifi6 = conf_ip6(ifi, &c->ip6, c->mac);
-	if (!c->ifi4 && !c->ifi6) {
-		err("External interface not usable");
-		exit(EXIT_FAILURE);
-	}
+	if (c->mode == MODE_PASTA)
+		nl_sock_init(c, true);
 
-	/* Now we can process port configuration options */
+	/* ...and outbound port options now that namespaces are set up. */
 	optind = 1;
 	do {
-		struct port_fwd *fwd = NULL;
+		struct port_fwd *fwd;
 
 		name = getopt_long(argc, argv, optstring, options, NULL);
-		switch (name) {
-		case 't':
-		case 'u':
-		case 'T':
-		case 'U':
-			if (name == 't')
-				fwd = &c->tcp.fwd_in;
-			else if (name == 'T')
-				fwd = &c->tcp.fwd_out;
-			else if (name == 'u')
-				fwd = &c->udp.fwd_in.f;
-			else if (name == 'U')
-				fwd = &c->udp.fwd_out.f;
 
+		if ((name == 'T' && (fwd = &c->tcp.fwd_out)) ||
+		    (name == 'U' && (fwd = &c->udp.fwd_out.f))) {
 			if (!optarg || conf_ports(c, name, optarg, fwd))
 				usage(argv[0]);
-
-			break;
-		default:
-			break;
 		}
 	} while (name != -1);
 
diff --git a/netlink.c b/netlink.c
index 6e5a96b..3ee4d42 100644
--- a/netlink.c
+++ b/netlink.c
@@ -19,6 +19,7 @@
 #include <sys/types.h>
 #include <limits.h>
 #include <stdlib.h>
+#include <stdbool.h>
 #include <stdint.h>
 #include <unistd.h>
 #include <arpa/inet.h>
@@ -71,25 +72,28 @@ ns:
 }
 
 /**
- * nl_sock_init() - Call nl_sock_init_do() and check for failures
+ * nl_sock_init() - Call nl_sock_init_do(), won't return on failure
  * @c:		Execution context
- *
- * Return: -EIO if sockets couldn't be set up, 0 otherwise
+ * @ns:		Get socket in namespace, not in init
  */
-int nl_sock_init(const struct ctx *c)
+void nl_sock_init(const struct ctx *c, bool ns)
 {
-	if (c->mode == MODE_PASTA) {
+	if (c->mode == MODE_PASTA && ns) {
 		NS_CALL(nl_sock_init_do, c);
 		if (nl_sock_ns == -1)
-			return -EIO;
+			goto fail;
 	} else {
 		nl_sock_init_do(NULL);
 	}
 
 	if (nl_sock == -1)
-		return -EIO;
+		goto fail;
 
-	return 0;
+	return;
+
+fail:
+	err("Failed to get netlink socket");
+	exit(EXIT_FAILURE);
 }
 
 /**
diff --git a/netlink.h b/netlink.h
index 5ce5037..3c991cc 100644
--- a/netlink.h
+++ b/netlink.h
@@ -6,7 +6,7 @@
 #ifndef NETLINK_H
 #define NETLINK_H
 
-int nl_sock_init(const struct ctx *c);
+void nl_sock_init(const struct ctx *c, bool ns);
 unsigned int nl_get_ext_if(sa_family_t af);
 void nl_route(int ns, unsigned int ifi, sa_family_t af, void *gw);
 void nl_addr(int ns, unsigned int ifi, sa_family_t af,
diff --git a/passt.1 b/passt.1
index 7d113f2..2cdba5d 100644
--- a/passt.1
+++ b/passt.1
@@ -771,14 +771,45 @@ possible to bind sockets to foreign addresses.
 
 .SS Binding to low numbered ports (well-known or system ports, up to 1023)
 
-If the port forwarding configuration requires binding to port numbers lower than
-1024, \fBpasst\fR and \fBpasta\fR will try to bind to them, but will fail if not
-running as root, or without the \fICAP_NET_BIND_SERVICE\fR Linux capability, see
-\fBservices\fR(5) and \fBcapabilities\fR(7). To grant the
-\fICAP_NET_BIND_SERVICE\fR capability to passt, you can issue, as root:
+If the port forwarding configuration requires binding to ports with numbers
+lower than 1024, \fBpasst\fR and \fBpasta\fR will try to bind to them, but will
+fail, unless, either:
+
+.IP \(bu 2
+the \fIsys.net.ipv4.ip_unprivileged_port_start\fR sysctl is set to the number
+of the lowest port \fBpasst\fR and \fBpasta\fR need. For example, as root:
+
+.nf
+	sysctl -w net.ipv4.ip_unprivileged_port_start=443
+.fi
+
+\fBNote\fR: this is the recommended way of enabling \fBpasst\fR and \fBpasta\fR
+to bind to ports with numbers below 1024.
+
+.IP \(bu
+or the \fICAP_NET_BIND_SERVICE\fR Linux capability is granted, see
+\fBservices\fR(5) and \fBcapabilities\fR(7).
+
+This is, in general, \fBnot the recommended way\fR, because \fBpasst\fR and
+\fBpasta\fR might be used as vector to effectively use this capability from
+another process.
+
+However, if your environment is sufficiently controlled by an LSM (Linux
+Security Module) such as \fIAppArmor\fR, \fISELinux\fR, \fISmack\fR or
+\fITOMOYO\fR, and no other processes can interact in such a way in virtue of
+this, granting this capability to \fBpasst\fR and \fBpasta\fR only can
+effectively prevent other processes from utilising it.
+
+Note that this will not work for automatic detection and forwarding of ports
+with \fBpasta\fR, because \fBpasta\fR will relinquish this capability at
+runtime.
+
+To grant this capability, you can issue, as root:
+
+.nf
+	setcap 'cap_net_bind_service=+ep' $(which passt)
+.fi
 
-.RS
-setcap 'cap_net_bind_service=+ep' $(which passt)
 .RE
 
 .SS ICMP/ICMPv6 Echo sockets
diff --git a/tap.c b/tap.c
index 77e513c..8b6d9bc 100644
--- a/tap.c
+++ b/tap.c
@@ -30,6 +30,7 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <sys/uio.h>
+#include <stdbool.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <netinet/ip.h>
-- 
@@ -30,6 +30,7 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <sys/uio.h>
+#include <stdbool.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <netinet/ip.h>
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] conf: Bind inbound ports with CAP_NET_BIND_SERVICE before isolate_user()
  2022-10-13 16:34 [PATCH] conf: Bind inbound ports with CAP_NET_BIND_SERVICE before isolate_user() Stefano Brivio
@ 2022-10-14  3:12 ` David Gibson
  2022-10-14  3:20   ` David Gibson
  0 siblings, 1 reply; 4+ messages in thread
From: David Gibson @ 2022-10-14  3:12 UTC (permalink / raw)
  To: Stefano Brivio; +Cc: passt-dev

[-- Attachment #1: Type: text/plain, Size: 9527 bytes --]

On Thu, Oct 13, 2022 at 06:34:06PM +0200, Stefano Brivio wrote:
> Even if CAP_NET_BIND_SERVICE is granted, we'll lose the capability in
> the target user namespace as we isolate the process, which means
> we're unable to bind to low ports at that point.
> 
> Bind inbound ports, and only those, before isolate_user(). Keep the
> handling of outbound ports (for pasta mode only) after the setup of
> the namespace, because that's where we'll bind them.
> 
> To this end, initialise the netlink socket for the init namespace
> before isolate_user() as well, as we actually need to know the
> addresses of the upstream interface before binding ports, in case
> they're not explicitly passed by the user.
> 
> As we now call nl_sock_init() twice, checking its return code from
> conf() twice looks a bit heavy: make it exit(), instead, as we
> can't do much if we don't have netlink sockets.
> 
> While at it:
> 
> - move the v4_only && v6_only options check just after the first
>   option processing loop, as this is more strictly related to
>   option parsing proper
> 
> - update the man page, explaining that CAP_NET_BIND_SERVICE is
>   *not* the preferred way to bind ports, because passt and pasta
>   can be abused to allow other processes to make effective usage
>   of it. Add a note about the recommended sysctl instead
> 
> Reported-by: David Gibson <david@gibson.dropbear.id.au>
> Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
> ---
> This is based on David's patchset "Fixes and cleanups for capability
> handling".
> 
>  conf.c    | 71 +++++++++++++++++++++++++++----------------------------
>  netlink.c | 20 +++++++++-------
>  netlink.h |  2 +-
>  passt.1   | 45 +++++++++++++++++++++++++++++------
>  tap.c     |  1 +
>  5 files changed, 87 insertions(+), 52 deletions(-)
> 
> diff --git a/conf.c b/conf.c
> index a22ef48..e1f42f1 100644
> --- a/conf.c
> +++ b/conf.c
> @@ -1530,6 +1530,11 @@ void conf(struct ctx *c, int argc, char **argv)
>  		}
>  	} while (name != -1);
>  
> +	if (v4_only && v6_only) {
> +		err("Options ipv4-only and ipv6-only are mutually exclusive");
> +		usage(argv[0]);
> +	}
> +
>  	ret = conf_ugid(runas, &uid, &gid);
>  	if (ret)
>  		usage(argv[0]);
> @@ -1539,6 +1544,30 @@ void conf(struct ctx *c, int argc, char **argv)
>  			     logfile, logsize);
>  	}
>  
> +	nl_sock_init(c, false);
> +	if (!v6_only)
> +		c->ifi4 = conf_ip4(ifi, &c->ip4, c->mac);
> +	if (!v4_only)
> +		c->ifi6 = conf_ip6(ifi, &c->ip6, c->mac);
> +	if (!c->ifi4 && !c->ifi6) {
> +		err("External interface not usable");
> +		exit(EXIT_FAILURE);
> +	}
> +
> +	/* Inbound port options can be parsed now (after IPv4/IPv6 settings) */
> +	optind = 1;
> +	do {
> +		struct port_fwd *fwd;
> +
> +		name = getopt_long(argc, argv, optstring, options, NULL);
> +
> +		if ((name == 't' && (fwd = &c->tcp.fwd_in)) ||
> +		    (name == 'u' && (fwd = &c->udp.fwd_in.f))) {
> +			if (!optarg || conf_ports(c, name, optarg, fwd))
> +				usage(argv[0]);
> +		}
> +	} while (name != -1);
> +
>  	if (c->mode == MODE_PASTA) {
>  		if (conf_pasta_ns(&netns_only, userns, netns,
>  				  optind, argc, argv) < 0)
> @@ -1561,50 +1590,20 @@ void conf(struct ctx *c, int argc, char **argv)
>  		}
>  	}
>  
> -	if (nl_sock_init(c)) {
> -		err("Failed to get netlink socket");
> -		exit(EXIT_FAILURE);
> -	}
> -
> -	if (v4_only && v6_only) {
> -		err("Options ipv4-only and ipv6-only are mutually exclusive");
> -		usage(argv[0]);
> -	}
> -	if (!v6_only)
> -		c->ifi4 = conf_ip4(ifi, &c->ip4, c->mac);
> -	if (!v4_only)
> -		c->ifi6 = conf_ip6(ifi, &c->ip6, c->mac);
> -	if (!c->ifi4 && !c->ifi6) {
> -		err("External interface not usable");
> -		exit(EXIT_FAILURE);
> -	}
> +	if (c->mode == MODE_PASTA)
> +		nl_sock_init(c, true);
>  
> -	/* Now we can process port configuration options */
> +	/* ...and outbound port options now that namespaces are set up. */
>  	optind = 1;
>  	do {
> -		struct port_fwd *fwd = NULL;
> +		struct port_fwd *fwd;
>  
>  		name = getopt_long(argc, argv, optstring, options, NULL);
> -		switch (name) {
> -		case 't':
> -		case 'u':
> -		case 'T':
> -		case 'U':
> -			if (name == 't')
> -				fwd = &c->tcp.fwd_in;
> -			else if (name == 'T')
> -				fwd = &c->tcp.fwd_out;
> -			else if (name == 'u')
> -				fwd = &c->udp.fwd_in.f;
> -			else if (name == 'U')
> -				fwd = &c->udp.fwd_out.f;
>  
> +		if ((name == 'T' && (fwd = &c->tcp.fwd_out)) ||
> +		    (name == 'U' && (fwd = &c->udp.fwd_out.f))) {
>  			if (!optarg || conf_ports(c, name, optarg, fwd))
>  				usage(argv[0]);
> -
> -			break;
> -		default:
> -			break;
>  		}
>  	} while (name != -1);
>  
> diff --git a/netlink.c b/netlink.c
> index 6e5a96b..3ee4d42 100644
> --- a/netlink.c
> +++ b/netlink.c
> @@ -19,6 +19,7 @@
>  #include <sys/types.h>
>  #include <limits.h>
>  #include <stdlib.h>
> +#include <stdbool.h>
>  #include <stdint.h>
>  #include <unistd.h>
>  #include <arpa/inet.h>
> @@ -71,25 +72,28 @@ ns:
>  }
>  
>  /**
> - * nl_sock_init() - Call nl_sock_init_do() and check for failures
> + * nl_sock_init() - Call nl_sock_init_do(), won't return on failure
>   * @c:		Execution context
> - *
> - * Return: -EIO if sockets couldn't be set up, 0 otherwise
> + * @ns:		Get socket in namespace, not in init
>   */
> -int nl_sock_init(const struct ctx *c)
> +void nl_sock_init(const struct ctx *c, bool ns)
>  {
> -	if (c->mode == MODE_PASTA) {
> +	if (c->mode == MODE_PASTA && ns) {

No need for the mode check here: ns is only ever true when in pasta
mode.  Since you're also calling nl_sock_init() twice explicitly, the
nasty goto in nl_sock_init_do() isn't really needed any more, and we
could make it just get one socket per call.  But maybe that cleanup's
not in scope for this patch.

>  		NS_CALL(nl_sock_init_do, c);
>  		if (nl_sock_ns == -1)
> -			return -EIO;
> +			goto fail;
>  	} else {
>  		nl_sock_init_do(NULL);
>  	}
>  
>  	if (nl_sock == -1)
> -		return -EIO;
> +		goto fail;
>  
> -	return 0;
> +	return;
> +
> +fail:
> +	err("Failed to get netlink socket");
> +	exit(EXIT_FAILURE);
>  }
>  
>  /**
> diff --git a/netlink.h b/netlink.h
> index 5ce5037..3c991cc 100644
> --- a/netlink.h
> +++ b/netlink.h
> @@ -6,7 +6,7 @@
>  #ifndef NETLINK_H
>  #define NETLINK_H
>  
> -int nl_sock_init(const struct ctx *c);
> +void nl_sock_init(const struct ctx *c, bool ns);
>  unsigned int nl_get_ext_if(sa_family_t af);
>  void nl_route(int ns, unsigned int ifi, sa_family_t af, void *gw);
>  void nl_addr(int ns, unsigned int ifi, sa_family_t af,
> diff --git a/passt.1 b/passt.1
> index 7d113f2..2cdba5d 100644
> --- a/passt.1
> +++ b/passt.1
> @@ -771,14 +771,45 @@ possible to bind sockets to foreign addresses.
>  
>  .SS Binding to low numbered ports (well-known or system ports, up to 1023)
>  
> -If the port forwarding configuration requires binding to port numbers lower than
> -1024, \fBpasst\fR and \fBpasta\fR will try to bind to them, but will fail if not
> -running as root, or without the \fICAP_NET_BIND_SERVICE\fR Linux capability, see
> -\fBservices\fR(5) and \fBcapabilities\fR(7). To grant the
> -\fICAP_NET_BIND_SERVICE\fR capability to passt, you can issue, as root:
> +If the port forwarding configuration requires binding to ports with numbers
> +lower than 1024, \fBpasst\fR and \fBpasta\fR will try to bind to them, but will
> +fail, unless, either:
> +
> +.IP \(bu 2
> +the \fIsys.net.ipv4.ip_unprivileged_port_start\fR sysctl is set to the number
> +of the lowest port \fBpasst\fR and \fBpasta\fR need. For example, as root:
> +
> +.nf
> +	sysctl -w net.ipv4.ip_unprivileged_port_start=443
> +.fi
> +
> +\fBNote\fR: this is the recommended way of enabling \fBpasst\fR and \fBpasta\fR
> +to bind to ports with numbers below 1024.
> +
> +.IP \(bu
> +or the \fICAP_NET_BIND_SERVICE\fR Linux capability is granted, see
> +\fBservices\fR(5) and \fBcapabilities\fR(7).
> +
> +This is, in general, \fBnot the recommended way\fR, because \fBpasst\fR and
> +\fBpasta\fR might be used as vector to effectively use this capability from
> +another process.
> +
> +However, if your environment is sufficiently controlled by an LSM (Linux
> +Security Module) such as \fIAppArmor\fR, \fISELinux\fR, \fISmack\fR or
> +\fITOMOYO\fR, and no other processes can interact in such a way in virtue of
> +this, granting this capability to \fBpasst\fR and \fBpasta\fR only can
> +effectively prevent other processes from utilising it.
> +
> +Note that this will not work for automatic detection and forwarding of ports
> +with \fBpasta\fR, because \fBpasta\fR will relinquish this capability at
> +runtime.
> +
> +To grant this capability, you can issue, as root:
> +
> +.nf
> +	setcap 'cap_net_bind_service=+ep' $(which passt)
> +.fi
>  
> -.RS
> -setcap 'cap_net_bind_service=+ep' $(which passt)
>  .RE
>  
>  .SS ICMP/ICMPv6 Echo sockets
> diff --git a/tap.c b/tap.c
> index 77e513c..8b6d9bc 100644
> --- a/tap.c
> +++ b/tap.c
> @@ -30,6 +30,7 @@
>  #include <sys/stat.h>
>  #include <fcntl.h>
>  #include <sys/uio.h>
> +#include <stdbool.h>
>  #include <stdlib.h>
>  #include <unistd.h>
>  #include <netinet/ip.h>

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] conf: Bind inbound ports with CAP_NET_BIND_SERVICE before isolate_user()
  2022-10-14  3:12 ` David Gibson
@ 2022-10-14  3:20   ` David Gibson
  2022-10-14  6:38     ` Stefano Brivio
  0 siblings, 1 reply; 4+ messages in thread
From: David Gibson @ 2022-10-14  3:20 UTC (permalink / raw)
  To: Stefano Brivio; +Cc: passt-dev

[-- Attachment #1: Type: text/plain, Size: 4119 bytes --]

On Fri, Oct 14, 2022 at 02:12:26PM +1100, David Gibson wrote:
> On Thu, Oct 13, 2022 at 06:34:06PM +0200, Stefano Brivio wrote:
> > Even if CAP_NET_BIND_SERVICE is granted, we'll lose the capability in
> > the target user namespace as we isolate the process, which means
> > we're unable to bind to low ports at that point.
> > 
> > Bind inbound ports, and only those, before isolate_user(). Keep the
> > handling of outbound ports (for pasta mode only) after the setup of
> > the namespace, because that's where we'll bind them.
> > 
> > To this end, initialise the netlink socket for the init namespace
> > before isolate_user() as well, as we actually need to know the
> > addresses of the upstream interface before binding ports, in case
> > they're not explicitly passed by the user.
> > 
> > As we now call nl_sock_init() twice, checking its return code from
> > conf() twice looks a bit heavy: make it exit(), instead, as we
> > can't do much if we don't have netlink sockets.
> > 
> > While at it:
> > 
> > - move the v4_only && v6_only options check just after the first
> >   option processing loop, as this is more strictly related to
> >   option parsing proper
> > 
> > - update the man page, explaining that CAP_NET_BIND_SERVICE is
> >   *not* the preferred way to bind ports, because passt and pasta
> >   can be abused to allow other processes to make effective usage
> >   of it. Add a note about the recommended sysctl instead
> > 
> > Reported-by: David Gibson <david@gibson.dropbear.id.au>
> > Signed-off-by: Stefano Brivio <sbrivio@redhat.com>

Sorry, sent the previous reply before I'd finished.

> > -If the port forwarding configuration requires binding to port numbers lower than
> > -1024, \fBpasst\fR and \fBpasta\fR will try to bind to them, but will fail if not
> > -running as root, or without the \fICAP_NET_BIND_SERVICE\fR Linux capability, see
> > -\fBservices\fR(5) and \fBcapabilities\fR(7). To grant the
> > -\fICAP_NET_BIND_SERVICE\fR capability to passt, you can issue, as root:
> > +If the port forwarding configuration requires binding to ports with numbers
> > +lower than 1024, \fBpasst\fR and \fBpasta\fR will try to bind to them, but will
> > +fail, unless, either:
> > +
> > +.IP \(bu 2
> > +the \fIsys.net.ipv4.ip_unprivileged_port_start\fR sysctl is set to the number
> > +of the lowest port \fBpasst\fR and \fBpasta\fR need. For example, as root:
> > +
> > +.nf
> > +	sysctl -w net.ipv4.ip_unprivileged_port_start=443
> > +.fi
> > +
> > +\fBNote\fR: this is the recommended way of enabling \fBpasst\fR and \fBpasta\fR
> > +to bind to ports with numbers below 1024.
> > +
> > +.IP \(bu
> > +or the \fICAP_NET_BIND_SERVICE\fR Linux capability is granted, see
> > +\fBservices\fR(5) and \fBcapabilities\fR(7).
> > +
> > +This is, in general, \fBnot the recommended way\fR, because \fBpasst\fR and
> > +\fBpasta\fR might be used as vector to effectively use this capability from
> > +another process.
> > +
> > +However, if your environment is sufficiently controlled by an LSM (Linux
> > +Security Module) such as \fIAppArmor\fR, \fISELinux\fR, \fISmack\fR or
> > +\fITOMOYO\fR, and no other processes can interact in such a way in virtue of
> > +this, granting this capability to \fBpasst\fR and \fBpasta\fR only can
> > +effectively prevent other processes from utilising it.
> > +
> > +Note that this will not work for automatic detection and forwarding of ports
> > +with \fBpasta\fR, because \fBpasta\fR will relinquish this capability at
> > +runtime.
> > +
> > +To grant this capability, you can issue, as root:
> > +
> > +.nf
> > +	setcap 'cap_net_bind_service=+ep' $(which passt)
> > +.fi
> >  
> > -.RS
> > -setcap 'cap_net_bind_service=+ep' $(which passt)
> >  .RE

These likely won't be enough, since for most users the caps on the
passt.avx2 binary are the ones that will matter.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] conf: Bind inbound ports with CAP_NET_BIND_SERVICE before isolate_user()
  2022-10-14  3:20   ` David Gibson
@ 2022-10-14  6:38     ` Stefano Brivio
  0 siblings, 0 replies; 4+ messages in thread
From: Stefano Brivio @ 2022-10-14  6:38 UTC (permalink / raw)
  To: David Gibson; +Cc: passt-dev

On Fri, 14 Oct 2022 14:12:26 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> On Thu, Oct 13, 2022 at 06:34:06PM +0200, Stefano Brivio wrote:
> > Even if CAP_NET_BIND_SERVICE is granted, we'll lose the capability in
> > the target user namespace as we isolate the process, which means
> > we're unable to bind to low ports at that point.
> > 
> > Bind inbound ports, and only those, before isolate_user(). Keep the
> > handling of outbound ports (for pasta mode only) after the setup of
> > the namespace, because that's where we'll bind them.
> > 
> > To this end, initialise the netlink socket for the init namespace
> > before isolate_user() as well, as we actually need to know the
> > addresses of the upstream interface before binding ports, in case
> > they're not explicitly passed by the user.
> > 
> > As we now call nl_sock_init() twice, checking its return code from
> > conf() twice looks a bit heavy: make it exit(), instead, as we
> > can't do much if we don't have netlink sockets.
> > 
> > While at it:
> > 
> > - move the v4_only && v6_only options check just after the first
> >   option processing loop, as this is more strictly related to
> >   option parsing proper
> > 
> > - update the man page, explaining that CAP_NET_BIND_SERVICE is
> >   *not* the preferred way to bind ports, because passt and pasta
> >   can be abused to allow other processes to make effective usage
> >   of it. Add a note about the recommended sysctl instead
> > 
> > Reported-by: David Gibson <david@gibson.dropbear.id.au>
> > Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
> > ---
> > This is based on David's patchset "Fixes and cleanups for capability
> > handling".
> > 
> >  conf.c    | 71 +++++++++++++++++++++++++++----------------------------
> >  netlink.c | 20 +++++++++-------
> >  netlink.h |  2 +-
> >  passt.1   | 45 +++++++++++++++++++++++++++++------
> >  tap.c     |  1 +
> >  5 files changed, 87 insertions(+), 52 deletions(-)
> > 
> > diff --git a/conf.c b/conf.c
> > index a22ef48..e1f42f1 100644
> > --- a/conf.c
> > +++ b/conf.c
> > @@ -1530,6 +1530,11 @@ void conf(struct ctx *c, int argc, char **argv)
> >  		}
> >  	} while (name != -1);
> >  
> > +	if (v4_only && v6_only) {
> > +		err("Options ipv4-only and ipv6-only are mutually exclusive");
> > +		usage(argv[0]);
> > +	}
> > +
> >  	ret = conf_ugid(runas, &uid, &gid);
> >  	if (ret)
> >  		usage(argv[0]);
> > @@ -1539,6 +1544,30 @@ void conf(struct ctx *c, int argc, char **argv)
> >  			     logfile, logsize);
> >  	}
> >  
> > +	nl_sock_init(c, false);
> > +	if (!v6_only)
> > +		c->ifi4 = conf_ip4(ifi, &c->ip4, c->mac);
> > +	if (!v4_only)
> > +		c->ifi6 = conf_ip6(ifi, &c->ip6, c->mac);
> > +	if (!c->ifi4 && !c->ifi6) {
> > +		err("External interface not usable");
> > +		exit(EXIT_FAILURE);
> > +	}
> > +
> > +	/* Inbound port options can be parsed now (after IPv4/IPv6 settings) */
> > +	optind = 1;
> > +	do {
> > +		struct port_fwd *fwd;
> > +
> > +		name = getopt_long(argc, argv, optstring, options, NULL);
> > +
> > +		if ((name == 't' && (fwd = &c->tcp.fwd_in)) ||
> > +		    (name == 'u' && (fwd = &c->udp.fwd_in.f))) {
> > +			if (!optarg || conf_ports(c, name, optarg, fwd))
> > +				usage(argv[0]);
> > +		}
> > +	} while (name != -1);
> > +
> >  	if (c->mode == MODE_PASTA) {
> >  		if (conf_pasta_ns(&netns_only, userns, netns,
> >  				  optind, argc, argv) < 0)
> > @@ -1561,50 +1590,20 @@ void conf(struct ctx *c, int argc, char **argv)
> >  		}
> >  	}
> >  
> > -	if (nl_sock_init(c)) {
> > -		err("Failed to get netlink socket");
> > -		exit(EXIT_FAILURE);
> > -	}
> > -
> > -	if (v4_only && v6_only) {
> > -		err("Options ipv4-only and ipv6-only are mutually exclusive");
> > -		usage(argv[0]);
> > -	}
> > -	if (!v6_only)
> > -		c->ifi4 = conf_ip4(ifi, &c->ip4, c->mac);
> > -	if (!v4_only)
> > -		c->ifi6 = conf_ip6(ifi, &c->ip6, c->mac);
> > -	if (!c->ifi4 && !c->ifi6) {
> > -		err("External interface not usable");
> > -		exit(EXIT_FAILURE);
> > -	}
> > +	if (c->mode == MODE_PASTA)
> > +		nl_sock_init(c, true);
> >  
> > -	/* Now we can process port configuration options */
> > +	/* ...and outbound port options now that namespaces are set up. */
> >  	optind = 1;
> >  	do {
> > -		struct port_fwd *fwd = NULL;
> > +		struct port_fwd *fwd;
> >  
> >  		name = getopt_long(argc, argv, optstring, options, NULL);
> > -		switch (name) {
> > -		case 't':
> > -		case 'u':
> > -		case 'T':
> > -		case 'U':
> > -			if (name == 't')
> > -				fwd = &c->tcp.fwd_in;
> > -			else if (name == 'T')
> > -				fwd = &c->tcp.fwd_out;
> > -			else if (name == 'u')
> > -				fwd = &c->udp.fwd_in.f;
> > -			else if (name == 'U')
> > -				fwd = &c->udp.fwd_out.f;
> >  
> > +		if ((name == 'T' && (fwd = &c->tcp.fwd_out)) ||
> > +		    (name == 'U' && (fwd = &c->udp.fwd_out.f))) {
> >  			if (!optarg || conf_ports(c, name, optarg, fwd))
> >  				usage(argv[0]);
> > -
> > -			break;
> > -		default:
> > -			break;
> >  		}
> >  	} while (name != -1);
> >  
> > diff --git a/netlink.c b/netlink.c
> > index 6e5a96b..3ee4d42 100644
> > --- a/netlink.c
> > +++ b/netlink.c
> > @@ -19,6 +19,7 @@
> >  #include <sys/types.h>
> >  #include <limits.h>
> >  #include <stdlib.h>
> > +#include <stdbool.h>
> >  #include <stdint.h>
> >  #include <unistd.h>
> >  #include <arpa/inet.h>
> > @@ -71,25 +72,28 @@ ns:
> >  }
> >  
> >  /**
> > - * nl_sock_init() - Call nl_sock_init_do() and check for failures
> > + * nl_sock_init() - Call nl_sock_init_do(), won't return on failure
> >   * @c:		Execution context
> > - *
> > - * Return: -EIO if sockets couldn't be set up, 0 otherwise
> > + * @ns:		Get socket in namespace, not in init
> >   */
> > -int nl_sock_init(const struct ctx *c)
> > +void nl_sock_init(const struct ctx *c, bool ns)
> >  {
> > -	if (c->mode == MODE_PASTA) {
> > +	if (c->mode == MODE_PASTA && ns) {  
> 
> No need for the mode check here: ns is only ever true when in pasta
> mode.  Since you're also calling nl_sock_init() twice explicitly, the
> nasty goto in nl_sock_init_do() isn't really needed any more, and we
> could make it just get one socket per call.  But maybe that cleanup's
> not in scope for this patch.

Hmm, yeah, why not, changed in v2.

On Fri, 14 Oct 2022 14:20:44 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> On Fri, Oct 14, 2022 at 02:12:26PM +1100, David Gibson wrote:
> > On Thu, Oct 13, 2022 at 06:34:06PM +0200, Stefano Brivio wrote:  
> > > Even if CAP_NET_BIND_SERVICE is granted, we'll lose the capability in
> > > the target user namespace as we isolate the process, which means
> > > we're unable to bind to low ports at that point.
> > > 
> > > Bind inbound ports, and only those, before isolate_user(). Keep the
> > > handling of outbound ports (for pasta mode only) after the setup of
> > > the namespace, because that's where we'll bind them.
> > > 
> > > To this end, initialise the netlink socket for the init namespace
> > > before isolate_user() as well, as we actually need to know the
> > > addresses of the upstream interface before binding ports, in case
> > > they're not explicitly passed by the user.
> > > 
> > > As we now call nl_sock_init() twice, checking its return code from
> > > conf() twice looks a bit heavy: make it exit(), instead, as we
> > > can't do much if we don't have netlink sockets.
> > > 
> > > While at it:
> > > 
> > > - move the v4_only && v6_only options check just after the first
> > >   option processing loop, as this is more strictly related to
> > >   option parsing proper
> > > 
> > > - update the man page, explaining that CAP_NET_BIND_SERVICE is
> > >   *not* the preferred way to bind ports, because passt and pasta
> > >   can be abused to allow other processes to make effective usage
> > >   of it. Add a note about the recommended sysctl instead
> > > 
> > > Reported-by: David Gibson <david@gibson.dropbear.id.au>
> > > Signed-off-by: Stefano Brivio <sbrivio@redhat.com>  
> 
> Sorry, sent the previous reply before I'd finished.
> 
> > > -If the port forwarding configuration requires binding to port numbers lower than
> > > -1024, \fBpasst\fR and \fBpasta\fR will try to bind to them, but will fail if not
> > > -running as root, or without the \fICAP_NET_BIND_SERVICE\fR Linux capability, see
> > > -\fBservices\fR(5) and \fBcapabilities\fR(7). To grant the
> > > -\fICAP_NET_BIND_SERVICE\fR capability to passt, you can issue, as root:
> > > +If the port forwarding configuration requires binding to ports with numbers
> > > +lower than 1024, \fBpasst\fR and \fBpasta\fR will try to bind to them, but will
> > > +fail, unless, either:
> > > +
> > > +.IP \(bu 2
> > > +the \fIsys.net.ipv4.ip_unprivileged_port_start\fR sysctl is set to the number
> > > +of the lowest port \fBpasst\fR and \fBpasta\fR need. For example, as root:
> > > +
> > > +.nf
> > > +	sysctl -w net.ipv4.ip_unprivileged_port_start=443
> > > +.fi
> > > +
> > > +\fBNote\fR: this is the recommended way of enabling \fBpasst\fR and \fBpasta\fR
> > > +to bind to ports with numbers below 1024.
> > > +
> > > +.IP \(bu
> > > +or the \fICAP_NET_BIND_SERVICE\fR Linux capability is granted, see
> > > +\fBservices\fR(5) and \fBcapabilities\fR(7).
> > > +
> > > +This is, in general, \fBnot the recommended way\fR, because \fBpasst\fR and
> > > +\fBpasta\fR might be used as vector to effectively use this capability from
> > > +another process.
> > > +
> > > +However, if your environment is sufficiently controlled by an LSM (Linux
> > > +Security Module) such as \fIAppArmor\fR, \fISELinux\fR, \fISmack\fR or
> > > +\fITOMOYO\fR, and no other processes can interact in such a way in virtue of
> > > +this, granting this capability to \fBpasst\fR and \fBpasta\fR only can
> > > +effectively prevent other processes from utilising it.
> > > +
> > > +Note that this will not work for automatic detection and forwarding of ports
> > > +with \fBpasta\fR, because \fBpasta\fR will relinquish this capability at
> > > +runtime.
> > > +
> > > +To grant this capability, you can issue, as root:
> > > +
> > > +.nf
> > > +	setcap 'cap_net_bind_service=+ep' $(which passt)
> > > +.fi
> > >  
> > > -.RS
> > > -setcap 'cap_net_bind_service=+ep' $(which passt)
> > >  .RE  
> 
> These likely won't be enough, since for most users the caps on the
> passt.avx2 binary are the ones that will matter.

...I didn't want to make the man page arch-specific, but on the other
hand it's very likely somebody will struggle with this.

Replaced with a while loop on which(1) output, that's pretty much the
only way to keep stderr clean and the man page nicely displayed in
small terminals.

-- 
Stefano


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-10-14  6:39 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-13 16:34 [PATCH] conf: Bind inbound ports with CAP_NET_BIND_SERVICE before isolate_user() Stefano Brivio
2022-10-14  3:12 ` David Gibson
2022-10-14  3:20   ` David Gibson
2022-10-14  6:38     ` Stefano Brivio

Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).