public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
* [PATCH v2 00/10] Clean up handling of userns
@ 2022-09-08  3:58 David Gibson
  2022-09-08  3:58 ` [PATCH v2 01/10] Don't store UID & GID persistently in the context structure David Gibson
                   ` (10 more replies)
  0 siblings, 11 replies; 33+ messages in thread
From: David Gibson @ 2022-09-08  3:58 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 1732 bytes --]

Sorry for the resend, but I found a bug that means this will fail to
build on some distros / versions.

Our handling of user namespaces is more complex than it needs to be.
This simplifies the handling by identifying and entering (or creating)
the correct userns earlier, so that later code doesn't need to deal
with it any more.

Along the way we make a number of other cleanups to handling of userns
and setting our user and group.

This is based on my earlier test command dispatch and performance test
cleanup series.

Changes since v1:
 * Fixed overenthusiastic pruning of #includes when moving the
   self-isolation code which broke compile on some distro versions

David Gibson (10):
  Don't store UID & GID persistently in the context structure
  Split checking for root from dropping root privilege
  Consolidate determination of UID/GID to run as
  Safer handling if we can't open /proc/self/uid_map
  Move self-isolation code into a separate file
  Consolidate validation of pasta namespace options
  Clean up and rename conf_ns_open()
  Correctly handle --netns-only in pasta_start_ns()
  Handle userns isolation and dropping root at the same time
  Allow --userns when pasta spawns a command

 Makefile    |   8 +-
 conf.c      | 236 ++++++++++++++++++++++++++--------------------------
 isolation.c | 210 ++++++++++++++++++++++++++++++++++++++++++++++
 isolation.h |  15 ++++
 passt.1     |   5 +-
 passt.c     | 116 +-------------------------
 passt.h     |   9 --
 pasta.c     |  91 ++++++++++++--------
 pasta.h     |   1 +
 util.c      |  83 ------------------
 util.h      |   2 -
 11 files changed, 412 insertions(+), 364 deletions(-)
 create mode 100644 isolation.c
 create mode 100644 isolation.h

-- 
2.37.3


^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH v2 01/10] Don't store UID & GID persistently in the context structure
  2022-09-08  3:58 [PATCH v2 00/10] Clean up handling of userns David Gibson
@ 2022-09-08  3:58 ` David Gibson
  2022-09-08  3:58 ` [PATCH v2 02/10] Split checking for root from dropping root privilege David Gibson
                   ` (9 subsequent siblings)
  10 siblings, 0 replies; 33+ messages in thread
From: David Gibson @ 2022-09-08  3:58 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 3803 bytes --]

c->uid and c->gid are first set in conf(), and last used in check_root()
itself called from conf().  Therefore these don't need to be fields in the
long lived context structure and can instead be locals in conf().

Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au>
---
 conf.c  |  8 +++++---
 passt.h |  5 -----
 util.c  | 12 ++++++------
 util.h  |  2 +-
 4 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/conf.c b/conf.c
index 2edb4ae..0fe5266 100644
--- a/conf.c
+++ b/conf.c
@@ -1086,6 +1086,8 @@ void conf(struct ctx *c, int argc, char **argv)
 	uint32_t *dns4 = c->ip4.dns;
 	int name, ret, mask, b, i;
 	unsigned int ifi = 0;
+	uid_t uid = 0;
+	gid_t gid = 0;
 
 	if (c->mode == MODE_PASTA)
 		c->no_dhcp_dns = c->no_dhcp_dns_search = 1;
@@ -1208,12 +1210,12 @@ void conf(struct ctx *c, int argc, char **argv)
 			c->trace = c->debug = c->foreground = 1;
 			break;
 		case 12:
-			if (c->uid || c->gid) {
+			if (uid || gid) {
 				err("Multiple --runas options given");
 				usage(argv[0]);
 			}
 
-			if (conf_runas(optarg, &c->uid, &c->gid)) {
+			if (conf_runas(optarg, &uid, &gid)) {
 				err("Invalid --runas option: %s", optarg);
 				usage(argv[0]);
 			}
@@ -1497,7 +1499,7 @@ void conf(struct ctx *c, int argc, char **argv)
 		}
 	} while (name != -1);
 
-	check_root(c);
+	check_root(&uid, &gid);
 
 	if (c->mode == MODE_PASTA) {
 		if (*netns && optind != argc) {
diff --git a/passt.h b/passt.h
index 347e7c1..3035430 100644
--- a/passt.h
+++ b/passt.h
@@ -144,8 +144,6 @@ struct ip6_ctx {
  * @sock_path:		Path for UNIX domain socket
  * @pcap:		Path for packet capture file
  * @pid_file:		Path to PID file, empty string if not configured
- * @uid:		UID we should drop to, if started as root
- * @gid:		GID we should drop to, if started as root
  * @pasta_netns_fd:	File descriptor for network namespace in pasta mode
  * @pasta_userns_fd:	Descriptor for user namespace to join, -1 once joined
  * @netns_only:		In pasta mode, don't join or create a user namespace
@@ -198,9 +196,6 @@ struct ctx {
 	char pcap[PATH_MAX];
 	char pid_file[PATH_MAX];
 
-	uid_t uid;
-	uid_t gid;
-
 	int pasta_netns_fd;
 	int pasta_userns_fd;
 	int netns_only;
diff --git a/util.c b/util.c
index 7e10deb..b2ccb3d 100644
--- a/util.c
+++ b/util.c
@@ -485,7 +485,7 @@ void drop_caps(void)
 /**
  * check_root() - Check if root in init ns, exit if we can't drop to user
  */
-void check_root(struct ctx *c)
+void check_root(uid_t *uid, gid_t *gid)
 {
 	const char root_uid_map[] = "         0          0 4294967295";
 	struct passwd *pw;
@@ -506,7 +506,7 @@ void check_root(struct ctx *c)
 
 	close(fd);
 
-	if (!c->uid) {
+	if (!*uid) {
 		fprintf(stderr, "Don't run as root. Changing to nobody...\n");
 #ifndef GLIBC_NO_STATIC_NSS
 		pw = getpwnam("nobody");
@@ -515,17 +515,17 @@ void check_root(struct ctx *c)
 			exit(EXIT_FAILURE);
 		}
 
-		c->uid = pw->pw_uid;
-		c->gid = pw->pw_gid;
+		*uid = pw->pw_uid;
+		*gid = pw->pw_gid;
 #else
 		(void)pw;
 
 		/* Common value for 'nobody', not really specified */
-		c->uid = c->gid = 65534;
+		*uid = *gid = 65534;
 #endif
 	}
 
-	if (!setgroups(0, NULL) && !setgid(c->gid) && !setuid(c->uid))
+	if (!setgroups(0, NULL) && !setgid(*gid) && !setuid(*uid))
 		return;
 
 	fprintf(stderr, "Can't change user/group, exiting");
diff --git a/util.h b/util.h
index 8297bec..58312fb 100644
--- a/util.h
+++ b/util.h
@@ -234,7 +234,7 @@ char *line_read(char *buf, size_t len, int fd);
 void procfs_scan_listen(struct ctx *c, uint8_t proto, int ip_version, int ns,
 			uint8_t *map, uint8_t *exclude);
 void drop_caps(void);
-void check_root(struct ctx *c);
+void check_root(uid_t *uid, gid_t *gid);
 int ns_enter(const struct ctx *c);
 void write_pidfile(int fd, pid_t pid);
 int __daemon(int pidfile_fd, int devnull_fd);
-- 
@@ -234,7 +234,7 @@ char *line_read(char *buf, size_t len, int fd);
 void procfs_scan_listen(struct ctx *c, uint8_t proto, int ip_version, int ns,
 			uint8_t *map, uint8_t *exclude);
 void drop_caps(void);
-void check_root(struct ctx *c);
+void check_root(uid_t *uid, gid_t *gid);
 int ns_enter(const struct ctx *c);
 void write_pidfile(int fd, pid_t pid);
 int __daemon(int pidfile_fd, int devnull_fd);
-- 
2.37.3


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH v2 02/10] Split checking for root from dropping root privilege
  2022-09-08  3:58 [PATCH v2 00/10] Clean up handling of userns David Gibson
  2022-09-08  3:58 ` [PATCH v2 01/10] Don't store UID & GID persistently in the context structure David Gibson
@ 2022-09-08  3:58 ` David Gibson
  2022-09-09 14:33   ` Stefano Brivio
  2022-09-08  3:59 ` [PATCH v2 03/10] Consolidate determination of UID/GID to run as David Gibson
                   ` (8 subsequent siblings)
  10 siblings, 1 reply; 33+ messages in thread
From: David Gibson @ 2022-09-08  3:58 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 4294 bytes --]

check_root() both checks to see if we are root (in the init namespace),
and if we are drops to an unprivileged user.  To make future cleanups
simpler, split the checking for root (now in check_root()) from the actual
dropping of privilege (now in drop_root()).

Note that this does slightly alter semantics.  Previously we would only
setuid() if we were originally root (in the init namespace).  Now we will
always setuid() and setgid(), though it won't actually change anything if
we weren't privileged to begin with.  This also means that we will now
always attempt to switch to the user specified with --runas, even if we
aren't (init namespace) root to begin with.  Obviously this will fail with
an error if we weren't privileged to start with.  --help and the man page
are updated accordingly.

Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au>
---
 conf.c  |  5 +++--
 passt.1 |  5 +++--
 util.c  | 27 ++++++++++++++++++++++++---
 util.h  |  1 +
 4 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/conf.c b/conf.c
index 0fe5266..545f61d 100644
--- a/conf.c
+++ b/conf.c
@@ -747,8 +747,8 @@ static void usage(const char *name)
 	info(   "    default: run in background if started from a TTY");
 	info(   "  -e, --stderr		Log to stderr too");
 	info(   "    default: log to system logger only if started from a TTY");
-	info(   "  --runas UID|UID:GID 	Use given UID, GID if started as root");
-	info(   "    UID and GID can be numeric, or login and group names");
+	info(   "  --runas UID|UID:GID 	Run as given UID, GID, which can be");
+	info(   "    numeric, or login and group names");
 	info(   "    default: drop to user \"nobody\"");
 	info(   "  -h, --help		Display this help message and exit");
 
@@ -1500,6 +1500,7 @@ void conf(struct ctx *c, int argc, char **argv)
 	} while (name != -1);
 
 	check_root(&uid, &gid);
+	drop_root(uid, gid);
 
 	if (c->mode == MODE_PASTA) {
 		if (*netns && optind != argc) {
diff --git a/passt.1 b/passt.1
index 61f0e4c..0a1593b 100644
--- a/passt.1
+++ b/passt.1
@@ -104,9 +104,10 @@ terminal, and to both system logger and standard error otherwise.
 
 .TP
 .BR \-\-runas " " \fIUID\fR|\fIUID:GID\fR|\fILOGIN\fR|\fILOGIN:GROUP\fR
-If started as root, change to given UID and corresponding group if UID is given,
+Attempt to change to given UID and corresponding group if UID is given,
 or to given UID and given GID if both are given. Alternatively, login name, or
-login name and group name can be passed.
+login name and group name can be passed.  This requires privilege (either
+initial effective UID 0 or CAP_SETUID capability) to work.
 Default is to change to user \fInobody\fR if started as root.
 
 .TP
diff --git a/util.c b/util.c
index b2ccb3d..17595c1 100644
--- a/util.c
+++ b/util.c
@@ -492,7 +492,13 @@ void check_root(uid_t *uid, gid_t *gid)
 	char buf[BUFSIZ];
 	int fd;
 
-	if (getuid() && geteuid())
+	if (!*uid)
+		*uid = geteuid();
+
+	if (!*gid)
+		*gid = getegid();
+
+	if (*uid)
 		return;
 
 	if ((fd = open("/proc/self/uid_map", O_RDONLY | O_CLOEXEC)) < 0)
@@ -524,11 +530,26 @@ void check_root(uid_t *uid, gid_t *gid)
 		*uid = *gid = 65534;
 #endif
 	}
+}
+
+/**
+ * drop_root() - Switch to given UID and GID
+ */
+void drop_root(uid_t uid, gid_t gid)
+{
+	if (setgroups(0, NULL)) {
+		/* If we don't start with CAP_SETGID, this will EPERM */
+		if (errno != EPERM) {
+			err("Can't drop supplementary groups: %s",
+			    strerror(errno));
+			exit(EXIT_FAILURE);
+		}
+	}
 
-	if (!setgroups(0, NULL) && !setgid(*gid) && !setuid(*uid))
+	if (!setgid(gid) && !setuid(uid))
 		return;
 
-	fprintf(stderr, "Can't change user/group, exiting");
+	err("Can't change user/group, exiting");
 	exit(EXIT_FAILURE);
 }
 
diff --git a/util.h b/util.h
index 58312fb..e2f686b 100644
--- a/util.h
+++ b/util.h
@@ -235,6 +235,7 @@ void procfs_scan_listen(struct ctx *c, uint8_t proto, int ip_version, int ns,
 			uint8_t *map, uint8_t *exclude);
 void drop_caps(void);
 void check_root(uid_t *uid, gid_t *gid);
+void drop_root(uid_t uid, gid_t gid);
 int ns_enter(const struct ctx *c);
 void write_pidfile(int fd, pid_t pid);
 int __daemon(int pidfile_fd, int devnull_fd);
-- 
@@ -235,6 +235,7 @@ void procfs_scan_listen(struct ctx *c, uint8_t proto, int ip_version, int ns,
 			uint8_t *map, uint8_t *exclude);
 void drop_caps(void);
 void check_root(uid_t *uid, gid_t *gid);
+void drop_root(uid_t uid, gid_t gid);
 int ns_enter(const struct ctx *c);
 void write_pidfile(int fd, pid_t pid);
 int __daemon(int pidfile_fd, int devnull_fd);
-- 
2.37.3


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH v2 03/10] Consolidate determination of UID/GID to run as
  2022-09-08  3:58 [PATCH v2 00/10] Clean up handling of userns David Gibson
  2022-09-08  3:58 ` [PATCH v2 01/10] Don't store UID & GID persistently in the context structure David Gibson
  2022-09-08  3:58 ` [PATCH v2 02/10] Split checking for root from dropping root privilege David Gibson
@ 2022-09-08  3:59 ` David Gibson
  2022-09-09 14:33   ` Stefano Brivio
  2022-09-08  3:59 ` [PATCH v2 04/10] Safer handling if we can't open /proc/self/uid_map David Gibson
                   ` (7 subsequent siblings)
  10 siblings, 1 reply; 33+ messages in thread
From: David Gibson @ 2022-09-08  3:59 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 5283 bytes --]

Currently the logic to work out what UID and GID we will run as is spread
across conf().  If --runas is specified it's handled in conf_runas(),
otherwise it's handled by check_root(), which depends on initialization of
the uid and gid variables by either conf() itself or conf_runas().

Make this clearer by putting all the UID and GID logic into a single
conf_ugid() function.

Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au>
---
 conf.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++------
 util.c | 50 ------------------------------------
 util.h |  1 -
 3 files changed, 73 insertions(+), 59 deletions(-)

diff --git a/conf.c b/conf.c
index 545f61d..5c293b5 100644
--- a/conf.c
+++ b/conf.c
@@ -1021,6 +1021,70 @@ static int conf_runas(const char *opt, unsigned int *uid, unsigned int *gid)
 #endif /* !GLIBC_NO_STATIC_NSS */
 }
 
+/**
+ * conf_ugid() - Determine UID and GID to run as
+ * @runas:	--runas option, may be NULL
+ * @uid:	User ID, set on success
+ * @gid:	Group ID, set on success
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+static int conf_ugid(const char *runas, uid_t *uid, gid_t *gid)
+{
+	const char root_uid_map[] = "         0          0 4294967295";
+	struct passwd *pw;
+	char buf[BUFSIZ];
+	int ret;
+	int fd;
+
+	/* If user has specified --runas, that takes precedence */
+	if (runas) {
+		ret = conf_runas(runas, uid, gid);
+		if (ret)
+			err("Invalid --runas option: %s", runas);
+		return ret;
+	}
+
+	/* Otherwise default to current user and group.. */
+	*uid = geteuid();
+	*gid = getegid();
+
+	/* ..as long as it's not root.. */
+	if (*uid)
+		return 0;
+
+	/* ..or at least not root in the init namespace.. */
+	if ((fd = open("/proc/self/uid_map", O_RDONLY | O_CLOEXEC)) < 0)
+		return 0;
+
+	if (read(fd, buf, BUFSIZ) != sizeof(root_uid_map) ||
+	    strncmp(buf, root_uid_map, sizeof(root_uid_map) - 1)) {
+		close(fd);
+		return 0;
+	}
+
+	close(fd);
+
+	/* ..otherwise use nobody:nobody */
+	warn("Don't run as root. Changing to nobody...");
+#ifndef GLIBC_NO_STATIC_NSS
+	pw = getpwnam("nobody");
+	if (!pw) {
+		perror("getpwnam");
+		exit(EXIT_FAILURE);
+	}
+
+	*uid = pw->pw_uid;
+	*gid = pw->pw_gid;
+#else
+	(void)pw;
+
+	/* Common value for 'nobody', not really specified */
+	*uid = *gid = 65534;
+#endif
+	return 0;
+}
+
 /**
  * conf() - Process command-line arguments and set configuration
  * @c:		Execution context
@@ -1085,9 +1149,10 @@ void conf(struct ctx *c, int argc, char **argv)
 	struct fqdn *dnss = c->dns_search;
 	uint32_t *dns4 = c->ip4.dns;
 	int name, ret, mask, b, i;
+	const char *runas = NULL;
 	unsigned int ifi = 0;
-	uid_t uid = 0;
-	gid_t gid = 0;
+	uid_t uid;
+	gid_t gid;
 
 	if (c->mode == MODE_PASTA)
 		c->no_dhcp_dns = c->no_dhcp_dns_search = 1;
@@ -1210,15 +1275,12 @@ void conf(struct ctx *c, int argc, char **argv)
 			c->trace = c->debug = c->foreground = 1;
 			break;
 		case 12:
-			if (uid || gid) {
+			if (runas) {
 				err("Multiple --runas options given");
 				usage(argv[0]);
 			}
 
-			if (conf_runas(optarg, &uid, &gid)) {
-				err("Invalid --runas option: %s", optarg);
-				usage(argv[0]);
-			}
+			runas = optarg;
 			break;
 		case 'd':
 			if (c->debug) {
@@ -1499,7 +1561,10 @@ void conf(struct ctx *c, int argc, char **argv)
 		}
 	} while (name != -1);
 
-	check_root(&uid, &gid);
+	ret = conf_ugid(runas, &uid, &gid);
+	if (ret)
+		usage(argv[0]);
+
 	drop_root(uid, gid);
 
 	if (c->mode == MODE_PASTA) {
diff --git a/util.c b/util.c
index 17595c1..654410f 100644
--- a/util.c
+++ b/util.c
@@ -482,56 +482,6 @@ void drop_caps(void)
 	}
 }
 
-/**
- * check_root() - Check if root in init ns, exit if we can't drop to user
- */
-void check_root(uid_t *uid, gid_t *gid)
-{
-	const char root_uid_map[] = "         0          0 4294967295";
-	struct passwd *pw;
-	char buf[BUFSIZ];
-	int fd;
-
-	if (!*uid)
-		*uid = geteuid();
-
-	if (!*gid)
-		*gid = getegid();
-
-	if (*uid)
-		return;
-
-	if ((fd = open("/proc/self/uid_map", O_RDONLY | O_CLOEXEC)) < 0)
-		return;
-
-	if (read(fd, buf, BUFSIZ) != sizeof(root_uid_map) ||
-	    strncmp(buf, root_uid_map, sizeof(root_uid_map) - 1)) {
-		close(fd);
-		return;
-	}
-
-	close(fd);
-
-	if (!*uid) {
-		fprintf(stderr, "Don't run as root. Changing to nobody...\n");
-#ifndef GLIBC_NO_STATIC_NSS
-		pw = getpwnam("nobody");
-		if (!pw) {
-			perror("getpwnam");
-			exit(EXIT_FAILURE);
-		}
-
-		*uid = pw->pw_uid;
-		*gid = pw->pw_gid;
-#else
-		(void)pw;
-
-		/* Common value for 'nobody', not really specified */
-		*uid = *gid = 65534;
-#endif
-	}
-}
-
 /**
  * drop_root() - Switch to given UID and GID
  */
diff --git a/util.h b/util.h
index e2f686b..9626cb5 100644
--- a/util.h
+++ b/util.h
@@ -234,7 +234,6 @@ char *line_read(char *buf, size_t len, int fd);
 void procfs_scan_listen(struct ctx *c, uint8_t proto, int ip_version, int ns,
 			uint8_t *map, uint8_t *exclude);
 void drop_caps(void);
-void check_root(uid_t *uid, gid_t *gid);
 void drop_root(uid_t uid, gid_t gid);
 int ns_enter(const struct ctx *c);
 void write_pidfile(int fd, pid_t pid);
-- 
@@ -234,7 +234,6 @@ char *line_read(char *buf, size_t len, int fd);
 void procfs_scan_listen(struct ctx *c, uint8_t proto, int ip_version, int ns,
 			uint8_t *map, uint8_t *exclude);
 void drop_caps(void);
-void check_root(uid_t *uid, gid_t *gid);
 void drop_root(uid_t uid, gid_t gid);
 int ns_enter(const struct ctx *c);
 void write_pidfile(int fd, pid_t pid);
-- 
2.37.3


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH v2 04/10] Safer handling if we can't open /proc/self/uid_map
  2022-09-08  3:58 [PATCH v2 00/10] Clean up handling of userns David Gibson
                   ` (2 preceding siblings ...)
  2022-09-08  3:59 ` [PATCH v2 03/10] Consolidate determination of UID/GID to run as David Gibson
@ 2022-09-08  3:59 ` David Gibson
  2022-09-09 14:33   ` Stefano Brivio
  2022-09-08  3:59 ` [PATCH v2 05/10] Move self-isolation code into a separate file David Gibson
                   ` (6 subsequent siblings)
  10 siblings, 1 reply; 33+ messages in thread
From: David Gibson @ 2022-09-08  3:59 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 1524 bytes --]

passt is allowed to run as "root" (UID 0) in a user namespace, but notas
real root in the init namespace.  We read /proc/self/uid_map to determine
if we're in the init namespace or not.

If we're unable to open /proc/self/uid_map we assume we're ok and continue
running as UID 0.  This seems unwise: AFAIK the only instance in which
uid_map won't be available is if we're running on a kernel which doesn't
support user namespaces, in which case we won't be able to sandbox
ourselves as we want and fail anyway.  If there are other circumstances
where it can't be opened it seems marginally more likely that we *are*
in the init namespace.

Therefore, fail with an error in this case, instead of carrying on.

Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au>
---
 conf.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/conf.c b/conf.c
index 5c293b5..f1aaa8a 100644
--- a/conf.c
+++ b/conf.c
@@ -1054,8 +1054,12 @@ static int conf_ugid(const char *runas, uid_t *uid, gid_t *gid)
 		return 0;
 
 	/* ..or at least not root in the init namespace.. */
-	if ((fd = open("/proc/self/uid_map", O_RDONLY | O_CLOEXEC)) < 0)
-		return 0;
+	if ((fd = open("/proc/self/uid_map", O_RDONLY | O_CLOEXEC)) < 0) {
+		ret = -errno;
+		err("Can't determine if we're in init namespace: %s",
+		    strerror(-ret));
+		return ret;
+	}
 
 	if (read(fd, buf, BUFSIZ) != sizeof(root_uid_map) ||
 	    strncmp(buf, root_uid_map, sizeof(root_uid_map) - 1)) {
-- 
@@ -1054,8 +1054,12 @@ static int conf_ugid(const char *runas, uid_t *uid, gid_t *gid)
 		return 0;
 
 	/* ..or at least not root in the init namespace.. */
-	if ((fd = open("/proc/self/uid_map", O_RDONLY | O_CLOEXEC)) < 0)
-		return 0;
+	if ((fd = open("/proc/self/uid_map", O_RDONLY | O_CLOEXEC)) < 0) {
+		ret = -errno;
+		err("Can't determine if we're in init namespace: %s",
+		    strerror(-ret));
+		return ret;
+	}
 
 	if (read(fd, buf, BUFSIZ) != sizeof(root_uid_map) ||
 	    strncmp(buf, root_uid_map, sizeof(root_uid_map) - 1)) {
-- 
2.37.3


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH v2 05/10] Move self-isolation code into a separate file
  2022-09-08  3:58 [PATCH v2 00/10] Clean up handling of userns David Gibson
                   ` (3 preceding siblings ...)
  2022-09-08  3:59 ` [PATCH v2 04/10] Safer handling if we can't open /proc/self/uid_map David Gibson
@ 2022-09-08  3:59 ` David Gibson
  2022-09-09 14:33   ` Stefano Brivio
  2022-09-08  3:59 ` [PATCH v2 06/10] Consolidate validation of pasta namespace options David Gibson
                   ` (5 subsequent siblings)
  10 siblings, 1 reply; 33+ messages in thread
From: David Gibson @ 2022-09-08  3:59 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 12646 bytes --]

passt/pasta contains a number of routines designed to isolate passt from
the rest of the system for security.  These are spread through util.c and
passt.c.  Move them together into a new isolation.c file.

Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au>
---
 Makefile    |   8 +--
 conf.c      |   1 +
 isolation.c | 165 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 isolation.h |  15 +++++
 passt.c     | 113 +----------------------------------
 pasta.c     |   1 +
 util.c      |  49 ----------------
 util.h      |   2 -
 8 files changed, 187 insertions(+), 167 deletions(-)
 create mode 100644 isolation.c
 create mode 100644 isolation.h

diff --git a/Makefile b/Makefile
index 644a541..af3d1ff 100644
--- a/Makefile
+++ b/Makefile
@@ -32,16 +32,16 @@ CFLAGS += -DRLIMIT_STACK_VAL=$(RLIMIT_STACK_VAL)
 CFLAGS += -DARCH=\"$(TARGET_ARCH)\"
 
 PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c icmp.c igmp.c \
-	lineread.c mld.c ndp.c netlink.c packet.c passt.c pasta.c pcap.c \
-	siphash.c tap.c tcp.c tcp_splice.c udp.c util.c
+	isolation.c lineread.c mld.c ndp.c netlink.c packet.c passt.c pasta.c \
+	pcap.c siphash.c tap.c tcp.c tcp_splice.c udp.c util.c
 QRAP_SRCS = qrap.c
 SRCS = $(PASST_SRCS) $(QRAP_SRCS)
 
 MANPAGES = passt.1 pasta.1 qrap.1
 
 PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h icmp.h \
-	lineread.h ndp.h netlink.h packet.h passt.h pasta.h pcap.h \
-	siphash.h tap.h tcp.h tcp_splice.h udp.h util.h
+	isolation.h lineread.h ndp.h netlink.h packet.h passt.h pasta.h \
+	pcap.h siphash.h tap.h tcp.h tcp_splice.h udp.h util.h
 HEADERS = $(PASST_HEADERS)
 
 # On gcc 11.2, with -O2 and -flto, tcp_hash() and siphash_20b(), if inlined,
diff --git a/conf.c b/conf.c
index f1aaa8a..08a2106 100644
--- a/conf.c
+++ b/conf.c
@@ -40,6 +40,7 @@
 #include "tcp.h"
 #include "pasta.h"
 #include "lineread.h"
+#include "isolation.h"
 
 /**
  * get_bound_ports() - Get maps of ports with bound sockets
diff --git a/isolation.c b/isolation.c
new file mode 100644
index 0000000..bc8240f
--- /dev/null
+++ b/isolation.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+/* PASST - Plug A Simple Socket Transport
+ *  for qemu/UNIX domain socket mode
+ *
+ * PASTA - Pack A Subtle Tap Abstraction
+ *  for network namespace/tap device mode
+ *
+ * isolation.c - Self isolation helpers
+ *
+ * Copyright Red Hat
+ * Author: Stefano Brivio <sbrivio(a)redhat.com>
+ * Author: David Gibson <david(a)gibson.dropbear.id.au>
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <grp.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <pwd.h>
+#include <sched.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+
+#include <linux/audit.h>
+#include <linux/capability.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+
+#include "util.h"
+#include "seccomp.h"
+#include "passt.h"
+#include "isolation.h"
+
+/**
+ * drop_caps() - Drop capabilities we might have except for CAP_NET_BIND_SERVICE
+ */
+void drop_caps(void)
+{
+	int i;
+
+	for (i = 0; i < 64; i++) {
+		if (i == CAP_NET_BIND_SERVICE)
+			continue;
+
+		prctl(PR_CAPBSET_DROP, i, 0, 0, 0);
+	}
+}
+
+/**
+ * drop_root() - Switch to given UID and GID
+ */
+void drop_root(uid_t uid, gid_t gid)
+{
+	if (setgroups(0, NULL)) {
+		/* If we don't start with CAP_SETGID, this will EPERM */
+		if (errno != EPERM) {
+			err("Can't drop supplementary groups: %s",
+			    strerror(errno));
+			exit(EXIT_FAILURE);
+		}
+	}
+
+	if (!setgid(gid) && !setuid(uid))
+		return;
+
+	err("Can't change user/group, exiting");
+	exit(EXIT_FAILURE);
+}
+
+/**
+ * sandbox() - Unshare IPC, mount, PID, UTS, and user namespaces, "unmount" root
+ *
+ * Return: negative error code on failure, zero on success
+ */
+int sandbox(struct ctx *c)
+{
+	int flags = CLONE_NEWIPC | CLONE_NEWNS | CLONE_NEWUTS;
+
+	if (!c->netns_only) {
+		if (c->pasta_userns_fd == -1)
+			flags |= CLONE_NEWUSER;
+		else
+			setns(c->pasta_userns_fd, CLONE_NEWUSER);
+	}
+
+	c->pasta_userns_fd = -1;
+
+	/* If we run in foreground, we have no chance to actually move to a new
+	 * PID namespace. For passt, use CLONE_NEWPID anyway, in case somebody
+	 * ever gets around seccomp profiles -- there's no harm in passing it.
+	 */
+	if (!c->foreground || c->mode == MODE_PASST)
+		flags |= CLONE_NEWPID;
+
+	if (unshare(flags)) {
+		perror("unshare");
+		return -errno;
+	}
+
+	if (mount("", "/", "", MS_UNBINDABLE | MS_REC, NULL)) {
+		perror("mount /");
+		return -errno;
+	}
+
+	if (mount("", TMPDIR, "tmpfs",
+		  MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RDONLY,
+		  "nr_inodes=2,nr_blocks=0")) {
+		perror("mount tmpfs");
+		return -errno;
+	}
+
+	if (chdir(TMPDIR)) {
+		perror("chdir");
+		return -errno;
+	}
+
+	if (syscall(SYS_pivot_root, ".", ".")) {
+		perror("pivot_root");
+		return -errno;
+	}
+
+	if (umount2(".", MNT_DETACH | UMOUNT_NOFOLLOW)) {
+		perror("umount2");
+		return -errno;
+	}
+
+	drop_caps();	/* Relative to the new user namespace this time. */
+
+	return 0;
+}
+
+/**
+ * seccomp() - Set up seccomp filters depending on mode, won't return on failure
+ * @c:		Execution context
+ */
+void seccomp(const struct ctx *c)
+{
+	struct sock_fprog prog;
+
+	if (c->mode == MODE_PASST) {
+		prog.len = (unsigned short)ARRAY_SIZE(filter_passt);
+		prog.filter = filter_passt;
+	} else {
+		prog.len = (unsigned short)ARRAY_SIZE(filter_pasta);
+		prog.filter = filter_pasta;
+	}
+
+	if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) ||
+	    prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
+		perror("prctl");
+		exit(EXIT_FAILURE);
+	}
+}
diff --git a/isolation.h b/isolation.h
new file mode 100644
index 0000000..2540a35
--- /dev/null
+++ b/isolation.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: AGPL-3.0-or-later
+ * Copyright Red Hat
+ * Author: Stefano Brivio <sbrivio(a)redhat.com>
+ * Author: David Gibson <david(a)gibson.dropbear.id.au>
+ */
+
+#ifndef ISOLATION_H
+#define ISOLATION_H
+
+void drop_caps(void);
+void drop_root(uid_t uid, gid_t gid);
+int sandbox(struct ctx *c);
+void seccomp(const struct ctx *c);
+
+#endif /* ISOLATION_H */
diff --git a/passt.c b/passt.c
index bbf53d9..2a8314c 100644
--- a/passt.c
+++ b/passt.c
@@ -19,51 +19,25 @@
  * created in a separate network namespace).
  */
 
-#include <sched.h>
-#include <stdio.h>
 #include <sys/epoll.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <dirent.h>
 #include <fcntl.h>
 #include <sys/mman.h>
 #include <sys/resource.h>
-#include <sys/uio.h>
-#include <sys/syscall.h>
-#include <sys/wait.h>
-#include <sys/mount.h>
-#include <netinet/ip.h>
-#include <net/ethernet.h>
-#include <libgen.h>
 #include <stdlib.h>
 #include <unistd.h>
-#include <net/if.h>
 #include <netdb.h>
 #include <string.h>
 #include <errno.h>
 #include <time.h>
 #include <syslog.h>
-#include <sys/stat.h>
 #include <sys/prctl.h>
-#include <stddef.h>
-#include <netinet/udp.h>
-#include <netinet/tcp.h>
 #include <netinet/if_ether.h>
 
-#include <linux/seccomp.h>
-#include <linux/audit.h>
-#include <linux/filter.h>
-#include <linux/icmpv6.h>
-
 #include "util.h"
-#include "seccomp.h"
 #include "passt.h"
 #include "dhcp.h"
 #include "dhcpv6.h"
-#include "icmp.h"
-#include "tcp.h"
-#include "udp.h"
+#include "isolation.h"
 #include "pcap.h"
 #include "tap.h"
 #include "conf.h"
@@ -166,91 +140,6 @@ void proto_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s,
 	udp_update_l2_buf(eth_d, eth_s, ip_da);
 }
 
-/**
- * seccomp() - Set up seccomp filters depending on mode, won't return on failure
- * @c:		Execution context
- */
-static void seccomp(const struct ctx *c)
-{
-	struct sock_fprog prog;
-
-	if (c->mode == MODE_PASST) {
-		prog.len = (unsigned short)ARRAY_SIZE(filter_passt);
-		prog.filter = filter_passt;
-	} else {
-		prog.len = (unsigned short)ARRAY_SIZE(filter_pasta);
-		prog.filter = filter_pasta;
-	}
-
-	if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) ||
-	    prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
-		perror("prctl");
-		exit(EXIT_FAILURE);
-	}
-}
-
-/**
- * sandbox() - Unshare IPC, mount, PID, UTS, and user namespaces, "unmount" root
- *
- * Return: negative error code on failure, zero on success
- */
-static int sandbox(struct ctx *c)
-{
-	int flags = CLONE_NEWIPC | CLONE_NEWNS | CLONE_NEWUTS;
-
-	if (!c->netns_only) {
-		if (c->pasta_userns_fd == -1)
-			flags |= CLONE_NEWUSER;
-		else
-			setns(c->pasta_userns_fd, CLONE_NEWUSER);
-	}
-
-	c->pasta_userns_fd = -1;
-
-	/* If we run in foreground, we have no chance to actually move to a new
-	 * PID namespace. For passt, use CLONE_NEWPID anyway, in case somebody
-	 * ever gets around seccomp profiles -- there's no harm in passing it.
-	 */
-	if (!c->foreground || c->mode == MODE_PASST)
-		flags |= CLONE_NEWPID;
-
-	if (unshare(flags)) {
-		perror("unshare");
-		return -errno;
-	}
-
-	if (mount("", "/", "", MS_UNBINDABLE | MS_REC, NULL)) {
-		perror("mount /");
-		return -errno;
-	}
-
-	if (mount("", TMPDIR, "tmpfs",
-		  MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RDONLY,
-		  "nr_inodes=2,nr_blocks=0")) {
-		perror("mount tmpfs");
-		return -errno;
-	}
-
-	if (chdir(TMPDIR)) {
-		perror("chdir");
-		return -errno;
-	}
-
-	if (syscall(SYS_pivot_root, ".", ".")) {
-		perror("pivot_root");
-		return -errno;
-	}
-
-	if (umount2(".", MNT_DETACH | UMOUNT_NOFOLLOW)) {
-		perror("umount2");
-		return -errno;
-	}
-
-	drop_caps();	/* Relative to the new user namespace this time. */
-
-	return 0;
-}
-
 /**
  * exit_handler() - Signal handler for SIGQUIT and SIGTERM
  * @unused:	Unused, handler deals with SIGQUIT and SIGTERM only
diff --git a/pasta.c b/pasta.c
index a844af2..0bdb655 100644
--- a/pasta.c
+++ b/pasta.c
@@ -40,6 +40,7 @@
 
 #include "util.h"
 #include "passt.h"
+#include "isolation.h"
 #include "netlink.h"
 
 /* PID of child, in case we created a namespace */
diff --git a/util.c b/util.c
index 654410f..f709838 100644
--- a/util.c
+++ b/util.c
@@ -13,30 +13,17 @@
  */
 
 #include <sched.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <stddef.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <arpa/inet.h>
 #include <net/ethernet.h>
-#include <net/if.h>
-#include <netinet/tcp.h>
-#include <netinet/udp.h>
 #include <sys/epoll.h>
-#include <sys/prctl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
 #include <fcntl.h>
 #include <syslog.h>
 #include <stdarg.h>
 #include <string.h>
 #include <time.h>
 #include <errno.h>
-#include <pwd.h>
-#include <grp.h>
-
-#include <linux/capability.h>
 
 #include "util.h"
 #include "passt.h"
@@ -467,42 +454,6 @@ void procfs_scan_listen(struct ctx *c, uint8_t proto, int ip_version, int ns,
 	}
 }
 
-/**
- * drop_caps() - Drop capabilities we might have except for CAP_NET_BIND_SERVICE
- */
-void drop_caps(void)
-{
-	int i;
-
-	for (i = 0; i < 64; i++) {
-		if (i == CAP_NET_BIND_SERVICE)
-			continue;
-
-		prctl(PR_CAPBSET_DROP, i, 0, 0, 0);
-	}
-}
-
-/**
- * drop_root() - Switch to given UID and GID
- */
-void drop_root(uid_t uid, gid_t gid)
-{
-	if (setgroups(0, NULL)) {
-		/* If we don't start with CAP_SETGID, this will EPERM */
-		if (errno != EPERM) {
-			err("Can't drop supplementary groups: %s",
-			    strerror(errno));
-			exit(EXIT_FAILURE);
-		}
-	}
-
-	if (!setgid(gid) && !setuid(uid))
-		return;
-
-	err("Can't change user/group, exiting");
-	exit(EXIT_FAILURE);
-}
-
 /**
  * ns_enter() - Enter configured user (unless already joined) and network ns
  * @c:		Execution context
diff --git a/util.h b/util.h
index 9626cb5..1003303 100644
--- a/util.h
+++ b/util.h
@@ -233,8 +233,6 @@ int bitmap_isset(const uint8_t *map, int bit);
 char *line_read(char *buf, size_t len, int fd);
 void procfs_scan_listen(struct ctx *c, uint8_t proto, int ip_version, int ns,
 			uint8_t *map, uint8_t *exclude);
-void drop_caps(void);
-void drop_root(uid_t uid, gid_t gid);
 int ns_enter(const struct ctx *c);
 void write_pidfile(int fd, pid_t pid);
 int __daemon(int pidfile_fd, int devnull_fd);
-- 
@@ -233,8 +233,6 @@ int bitmap_isset(const uint8_t *map, int bit);
 char *line_read(char *buf, size_t len, int fd);
 void procfs_scan_listen(struct ctx *c, uint8_t proto, int ip_version, int ns,
 			uint8_t *map, uint8_t *exclude);
-void drop_caps(void);
-void drop_root(uid_t uid, gid_t gid);
 int ns_enter(const struct ctx *c);
 void write_pidfile(int fd, pid_t pid);
 int __daemon(int pidfile_fd, int devnull_fd);
-- 
2.37.3


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH v2 06/10] Consolidate validation of pasta namespace options
  2022-09-08  3:58 [PATCH v2 00/10] Clean up handling of userns David Gibson
                   ` (4 preceding siblings ...)
  2022-09-08  3:59 ` [PATCH v2 05/10] Move self-isolation code into a separate file David Gibson
@ 2022-09-08  3:59 ` David Gibson
  2022-09-08  3:59 ` [PATCH v2 07/10] Clean up and rename conf_ns_open() David Gibson
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 33+ messages in thread
From: David Gibson @ 2022-09-08  3:59 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 5583 bytes --]

There are a number of different ways to specify namespaces for pasta to
use.  Some combinations are valid and some are not.  Currently validation
for these is spread across several places: conf_ns_pid() validates PID
options specifically.  Near its callsite in conf() several other checks
are made. Some additional checks are made in conf_ns_open() and finally
theres a check just before the call to pasta_start_ns().

This is quite hard to follow.  Make it easier by putting all the validation
logic together in a new conf_pasta_ns() function, which subsumes
conf_ns_pid().  This reveals that some of the checks were redundant with
each other, so remove those.

For good measure, rename conf_netns() to conf_netns_opt() to make it
clearer its handling just the --netns option specifically, not overall
configuration of the netns.

Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au>
---
 conf.c | 83 +++++++++++++++++++++++++++++-----------------------------
 1 file changed, 42 insertions(+), 41 deletions(-)

diff --git a/conf.c b/conf.c
index 08a2106..5018794 100644
--- a/conf.c
+++ b/conf.c
@@ -491,13 +491,13 @@ out:
 }
 
 /**
- * conf_netns() - Parse --netns option
+ * conf_netns_opt() - Parse --netns option
  * @netns:	buffer of size PATH_MAX, updated with netns path
  * @arg:	--netns argument
  *
  * Return: 0 on success, negative error code otherwise
  */
-static int conf_netns(char *netns, const char *arg)
+static int conf_netns_opt(char *netns, const char *arg)
 {
 	int ret;
 
@@ -518,40 +518,59 @@ static int conf_netns(char *netns, const char *arg)
 }
 
 /**
- * conf_ns_pid() - Parse non-option argument as a PID
+ * conf_pasta_ns() - Validate all pasta namespace options
+ * @netns_only:	Don't use userns, may be updated
  * @userns:	buffer of size PATH_MAX, initially contains --userns
  *		argument (may be empty), updated with userns path
  * @netns:	buffer of size PATH_MAX, initial contains --netns
  *		argument (may be empty), updated with netns path
- * @arg:	PID of network namespace
+ * @optind:	Index of first non-option argument
+ * @argc:	Number of arguments
+ * @argv:	Command line arguments
  *
  * Return: 0 on success, negative error code otherwise
  */
-static int conf_ns_pid(char *userns, char *netns, const char *arg)
+static int conf_pasta_ns(int *netns_only, char *userns, char *netns,
+			 int optind, int argc, char *argv[])
 {
-	char *endptr;
-	long pidval;
+	if (*netns_only && *userns) {
+		err("Both --userns and --netns-only given");
+		return -EINVAL;
+	}
 
-	if (*netns) {
-		err("Both --netns and PID given");
+	if (*netns && optind != argc) {
+		err("Both --netns and PID or command given");
 		return -EINVAL;
 	}
 
-	pidval = strtol(arg, &endptr, 10);
-	if (!*endptr) {
-		/* Looks like a pid */
-		if (pidval < 0 || pidval > INT_MAX) {
-			err("Invalid PID %s", arg);
-			return -EINVAL;
+	if (optind + 1 == argc) {
+		char *endptr;
+		long pidval;
+
+		pidval = strtol(argv[optind], &endptr, 10);
+		if (!*endptr) {
+			/* Looks like a pid */
+			if (pidval < 0 || pidval > INT_MAX) {
+				err("Invalid PID %s", argv[optind]);
+				return -EINVAL;
+			}
+
+			snprintf(netns, PATH_MAX, "/proc/%ld/ns/net", pidval);
+			if (!*userns)
+				snprintf(userns, PATH_MAX, "/proc/%ld/ns/user",
+					 pidval);
 		}
+	}
 
-		snprintf(netns, PATH_MAX, "/proc/%ld/ns/net", pidval);
-		if (!*userns)
-			snprintf(userns, PATH_MAX, "/proc/%ld/ns/user", pidval);
-		return 0;
+	if (*userns && !*netns) {
+		err("--userns requires --netns or PID");
+		return -EINVAL;
 	}
 
-	/* Not a PID, later code will treat as a command */
+	/* Attaching to a netns/PID, with no userns given */
+	if (*netns && !*userns)
+		*netns_only = 1;
+
 	return 0;
 }
 
@@ -585,11 +604,6 @@ static int conf_ns_open(struct ctx *c, const char *userns, const char *netns)
 {
 	int ufd = -1, nfd = -1;
 
-	if (c->netns_only && *userns) {
-		err("Both --userns and --netns-only given");
-		return -EINVAL;
-	}
-
 	nfd = open(netns, O_RDONLY | O_CLOEXEC);
 	if (nfd < 0) {
 		err("Couldn't open network namespace %s", netns);
@@ -607,7 +621,6 @@ static int conf_ns_open(struct ctx *c, const char *userns, const char *netns)
 
 	c->pasta_netns_fd = nfd;
 	c->pasta_userns_fd = ufd;
-	c->netns_only = !*userns;
 
 	NS_CALL(conf_ns_check, c);
 
@@ -1194,7 +1207,7 @@ void conf(struct ctx *c, int argc, char **argv)
 				usage(argv[0]);
 			}
 
-			ret = conf_netns(netns, optarg);
+			ret = conf_netns_opt(netns, optarg);
 			if (ret < 0)
 				usage(argv[0]);
 			break;
@@ -1573,17 +1586,9 @@ void conf(struct ctx *c, int argc, char **argv)
 	drop_root(uid, gid);
 
 	if (c->mode == MODE_PASTA) {
-		if (*netns && optind != argc) {
-			err("Both --netns and PID or command given");
-			usage(argv[0]);
-		} else if (optind + 1 == argc) {
-			ret = conf_ns_pid(userns, netns, argv[optind]);
-			if (ret < 0)
-				usage(argv[0]);
-		} else if (*userns && !*netns && optind == argc) {
-			err("--userns requires --netns or PID");
+		if (conf_pasta_ns(&c->netns_only, userns, netns,
+				  optind, argc, argv) < 0)
 			usage(argv[0]);
-		}
 	} else if (optind != argc) {
 		usage(argv[0]);
 	}
@@ -1597,10 +1602,6 @@ void conf(struct ctx *c, int argc, char **argv)
 			if (ret < 0)
 				usage(argv[0]);
 		} else {
-			if (*userns) {
-				err("Both --userns and command given");
-				usage(argv[0]);
-			}
 			pasta_start_ns(c, argc - optind, argv + optind);
 		}
 	}
-- 
@@ -491,13 +491,13 @@ out:
 }
 
 /**
- * conf_netns() - Parse --netns option
+ * conf_netns_opt() - Parse --netns option
  * @netns:	buffer of size PATH_MAX, updated with netns path
  * @arg:	--netns argument
  *
  * Return: 0 on success, negative error code otherwise
  */
-static int conf_netns(char *netns, const char *arg)
+static int conf_netns_opt(char *netns, const char *arg)
 {
 	int ret;
 
@@ -518,40 +518,59 @@ static int conf_netns(char *netns, const char *arg)
 }
 
 /**
- * conf_ns_pid() - Parse non-option argument as a PID
+ * conf_pasta_ns() - Validate all pasta namespace options
+ * @netns_only:	Don't use userns, may be updated
  * @userns:	buffer of size PATH_MAX, initially contains --userns
  *		argument (may be empty), updated with userns path
  * @netns:	buffer of size PATH_MAX, initial contains --netns
  *		argument (may be empty), updated with netns path
- * @arg:	PID of network namespace
+ * @optind:	Index of first non-option argument
+ * @argc:	Number of arguments
+ * @argv:	Command line arguments
  *
  * Return: 0 on success, negative error code otherwise
  */
-static int conf_ns_pid(char *userns, char *netns, const char *arg)
+static int conf_pasta_ns(int *netns_only, char *userns, char *netns,
+			 int optind, int argc, char *argv[])
 {
-	char *endptr;
-	long pidval;
+	if (*netns_only && *userns) {
+		err("Both --userns and --netns-only given");
+		return -EINVAL;
+	}
 
-	if (*netns) {
-		err("Both --netns and PID given");
+	if (*netns && optind != argc) {
+		err("Both --netns and PID or command given");
 		return -EINVAL;
 	}
 
-	pidval = strtol(arg, &endptr, 10);
-	if (!*endptr) {
-		/* Looks like a pid */
-		if (pidval < 0 || pidval > INT_MAX) {
-			err("Invalid PID %s", arg);
-			return -EINVAL;
+	if (optind + 1 == argc) {
+		char *endptr;
+		long pidval;
+
+		pidval = strtol(argv[optind], &endptr, 10);
+		if (!*endptr) {
+			/* Looks like a pid */
+			if (pidval < 0 || pidval > INT_MAX) {
+				err("Invalid PID %s", argv[optind]);
+				return -EINVAL;
+			}
+
+			snprintf(netns, PATH_MAX, "/proc/%ld/ns/net", pidval);
+			if (!*userns)
+				snprintf(userns, PATH_MAX, "/proc/%ld/ns/user",
+					 pidval);
 		}
+	}
 
-		snprintf(netns, PATH_MAX, "/proc/%ld/ns/net", pidval);
-		if (!*userns)
-			snprintf(userns, PATH_MAX, "/proc/%ld/ns/user", pidval);
-		return 0;
+	if (*userns && !*netns) {
+		err("--userns requires --netns or PID");
+		return -EINVAL;
 	}
 
-	/* Not a PID, later code will treat as a command */
+	/* Attaching to a netns/PID, with no userns given */
+	if (*netns && !*userns)
+		*netns_only = 1;
+
 	return 0;
 }
 
@@ -585,11 +604,6 @@ static int conf_ns_open(struct ctx *c, const char *userns, const char *netns)
 {
 	int ufd = -1, nfd = -1;
 
-	if (c->netns_only && *userns) {
-		err("Both --userns and --netns-only given");
-		return -EINVAL;
-	}
-
 	nfd = open(netns, O_RDONLY | O_CLOEXEC);
 	if (nfd < 0) {
 		err("Couldn't open network namespace %s", netns);
@@ -607,7 +621,6 @@ static int conf_ns_open(struct ctx *c, const char *userns, const char *netns)
 
 	c->pasta_netns_fd = nfd;
 	c->pasta_userns_fd = ufd;
-	c->netns_only = !*userns;
 
 	NS_CALL(conf_ns_check, c);
 
@@ -1194,7 +1207,7 @@ void conf(struct ctx *c, int argc, char **argv)
 				usage(argv[0]);
 			}
 
-			ret = conf_netns(netns, optarg);
+			ret = conf_netns_opt(netns, optarg);
 			if (ret < 0)
 				usage(argv[0]);
 			break;
@@ -1573,17 +1586,9 @@ void conf(struct ctx *c, int argc, char **argv)
 	drop_root(uid, gid);
 
 	if (c->mode == MODE_PASTA) {
-		if (*netns && optind != argc) {
-			err("Both --netns and PID or command given");
-			usage(argv[0]);
-		} else if (optind + 1 == argc) {
-			ret = conf_ns_pid(userns, netns, argv[optind]);
-			if (ret < 0)
-				usage(argv[0]);
-		} else if (*userns && !*netns && optind == argc) {
-			err("--userns requires --netns or PID");
+		if (conf_pasta_ns(&c->netns_only, userns, netns,
+				  optind, argc, argv) < 0)
 			usage(argv[0]);
-		}
 	} else if (optind != argc) {
 		usage(argv[0]);
 	}
@@ -1597,10 +1602,6 @@ void conf(struct ctx *c, int argc, char **argv)
 			if (ret < 0)
 				usage(argv[0]);
 		} else {
-			if (*userns) {
-				err("Both --userns and command given");
-				usage(argv[0]);
-			}
 			pasta_start_ns(c, argc - optind, argv + optind);
 		}
 	}
-- 
2.37.3


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH v2 07/10] Clean up and rename conf_ns_open()
  2022-09-08  3:58 [PATCH v2 00/10] Clean up handling of userns David Gibson
                   ` (5 preceding siblings ...)
  2022-09-08  3:59 ` [PATCH v2 06/10] Consolidate validation of pasta namespace options David Gibson
@ 2022-09-08  3:59 ` David Gibson
  2022-09-08  3:59 ` [PATCH v2 08/10] Correctly handle --netns-only in pasta_start_ns() David Gibson
                   ` (3 subsequent siblings)
  10 siblings, 0 replies; 33+ messages in thread
From: David Gibson @ 2022-09-08  3:59 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 5662 bytes --]

conf_ns_open() opens file descriptors for the namespaces pasta needs, but
it doesnt really have anything to do with configuration any more.  For
better clarity, move it to pasta.c and rename it pasta_open_ns().  This
makes the symmetry between it and pasta_start_ns() more clear, since these
represent the two basic ways that pasta can operate, either attaching to
an existing namespace/process or spawning a new one.

Since its no longer validating options, the errors it could return
shouldn't cause a usage message.  Just exit directly with an error instead.

Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au>
---
 conf.c  | 72 +--------------------------------------------------------
 pasta.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 pasta.h |  1 +
 3 files changed, 68 insertions(+), 71 deletions(-)

diff --git a/conf.c b/conf.c
index 5018794..7171748 100644
--- a/conf.c
+++ b/conf.c
@@ -20,7 +20,6 @@
 #include <sched.h>
 #include <sys/types.h>
 #include <sys/stat.h>
-#include <libgen.h>
 #include <limits.h>
 #include <grp.h>
 #include <pwd.h>
@@ -574,73 +573,6 @@ static int conf_pasta_ns(int *netns_only, char *userns, char *netns,
 	return 0;
 }
 
-/**
- * conf_ns_check() - Check if we can enter configured namespaces
- * @arg:	Execution context
- *
- * Return: 0
- */
-static int conf_ns_check(void *arg)
-{
-	struct ctx *c = (struct ctx *)arg;
-
-	if ((!c->netns_only && setns(c->pasta_userns_fd, CLONE_NEWUSER)) ||
-	    setns(c->pasta_netns_fd, CLONE_NEWNET))
-		c->pasta_userns_fd = c->pasta_netns_fd = -1;
-
-	return 0;
-
-}
-
-/**
- * conf_ns_open() - Open network, user namespaces descriptors from configuration
- * @c:		Execution context
- * @userns:	--userns argument, can be an empty string
- * @netns:	network namespace path
- *
- * Return: 0 on success, negative error code otherwise
- */
-static int conf_ns_open(struct ctx *c, const char *userns, const char *netns)
-{
-	int ufd = -1, nfd = -1;
-
-	nfd = open(netns, O_RDONLY | O_CLOEXEC);
-	if (nfd < 0) {
-		err("Couldn't open network namespace %s", netns);
-		return -ENOENT;
-	}
-
-	if (!c->netns_only && *userns) {
-		ufd = open(userns, O_RDONLY | O_CLOEXEC);
-		if (ufd < 0) {
-			close(nfd);
-			err("Couldn't open user namespace %s", userns);
-			return -ENOENT;
-		}
-	}
-
-	c->pasta_netns_fd = nfd;
-	c->pasta_userns_fd = ufd;
-
-	NS_CALL(conf_ns_check, c);
-
-	if (c->pasta_netns_fd < 0) {
-		err("Couldn't switch to pasta namespaces");
-		return -ENOENT;
-	}
-
-	if (!c->no_netns_quit) {
-		char buf[PATH_MAX];
-
-		strncpy(buf, netns, PATH_MAX);
-		strncpy(c->netns_base, basename(buf), PATH_MAX - 1);
-		strncpy(buf, netns, PATH_MAX);
-		strncpy(c->netns_dir, dirname(buf), PATH_MAX - 1);
-	}
-
-	return 0;
-}
-
 /**
  * conf_ip4() - Verify or detect IPv4 support, get relevant addresses
  * @ifi:	Host interface to attempt (0 to determine one)
@@ -1598,9 +1530,7 @@ void conf(struct ctx *c, int argc, char **argv)
 
 	if (c->mode == MODE_PASTA) {
 		if (*netns) {
-			ret = conf_ns_open(c, userns, netns);
-			if (ret < 0)
-				usage(argv[0]);
+			pasta_open_ns(c, userns, netns);
 		} else {
 			pasta_start_ns(c, argc - optind, argv + optind);
 		}
diff --git a/pasta.c b/pasta.c
index 0bdb655..0fd45e4 100644
--- a/pasta.c
+++ b/pasta.c
@@ -20,6 +20,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <errno.h>
+#include <libgen.h>
 #include <limits.h>
 #include <stdlib.h>
 #include <stdint.h>
@@ -101,6 +102,71 @@ netns:
 	return 0;
 }
 
+/**
+ * ns_check() - Check if we can enter configured namespaces
+ * @arg:	Execution context
+ *
+ * Return: 0
+ */
+static int ns_check(void *arg)
+{
+	struct ctx *c = (struct ctx *)arg;
+
+	if ((!c->netns_only && setns(c->pasta_userns_fd, CLONE_NEWUSER)) ||
+	    setns(c->pasta_netns_fd, CLONE_NEWNET))
+		c->pasta_userns_fd = c->pasta_netns_fd = -1;
+
+	return 0;
+
+}
+
+/**
+ * pasta_open_ns() - Open network, user namespaces descriptors
+ * @c:		Execution context
+ * @userns:	--userns argument, can be an empty string
+ * @netns:	network namespace path
+ *
+ * Return: 0 on success, negative error code otherwise
+ */
+void pasta_open_ns(struct ctx *c, const char *userns, const char *netns)
+{
+	int ufd = -1, nfd = -1;
+
+	nfd = open(netns, O_RDONLY | O_CLOEXEC);
+	if (nfd < 0) {
+		err("Couldn't open network namespace %s", netns);
+		exit(EXIT_FAILURE);
+	}
+
+	if (!c->netns_only && *userns) {
+		ufd = open(userns, O_RDONLY | O_CLOEXEC);
+		if (ufd < 0) {
+			close(nfd);
+			err("Couldn't open user namespace %s", userns);
+			exit(EXIT_FAILURE);
+		}
+	}
+
+	c->pasta_netns_fd = nfd;
+	c->pasta_userns_fd = ufd;
+
+	NS_CALL(ns_check, c);
+
+	if (c->pasta_netns_fd < 0) {
+		err("Couldn't switch to pasta namespaces");
+		exit(EXIT_FAILURE);
+	}
+
+	if (!c->no_netns_quit) {
+		char buf[PATH_MAX] = { 0 };
+
+		strncpy(buf, netns, PATH_MAX - 1);
+		strncpy(c->netns_base, basename(buf), PATH_MAX - 1);
+		strncpy(buf, netns, PATH_MAX - 1);
+		strncpy(c->netns_dir, dirname(buf), PATH_MAX - 1);
+	}
+}
+
 /**
  * struct pasta_setup_ns_arg - Argument for pasta_setup_ns()
  * @c:		Execution context
diff --git a/pasta.h b/pasta.h
index 19b2e54..a1937b2 100644
--- a/pasta.h
+++ b/pasta.h
@@ -6,6 +6,7 @@
 #ifndef PASTA_H
 #define PASTA_H
 
+void pasta_open_ns(struct ctx *c, const char *userns, const char *netns);
 void pasta_start_ns(struct ctx *c, int argc, char *argv[]);
 void pasta_ns_conf(struct ctx *c);
 void pasta_child_handler(int signal);
-- 
@@ -6,6 +6,7 @@
 #ifndef PASTA_H
 #define PASTA_H
 
+void pasta_open_ns(struct ctx *c, const char *userns, const char *netns);
 void pasta_start_ns(struct ctx *c, int argc, char *argv[]);
 void pasta_ns_conf(struct ctx *c);
 void pasta_child_handler(int signal);
-- 
2.37.3


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH v2 08/10] Correctly handle --netns-only in pasta_start_ns()
  2022-09-08  3:58 [PATCH v2 00/10] Clean up handling of userns David Gibson
                   ` (6 preceding siblings ...)
  2022-09-08  3:59 ` [PATCH v2 07/10] Clean up and rename conf_ns_open() David Gibson
@ 2022-09-08  3:59 ` David Gibson
  2022-09-09 14:34   ` Stefano Brivio
  2022-09-08  3:59 ` [PATCH v2 09/10] Handle userns isolation and dropping root at the same time David Gibson
                   ` (2 subsequent siblings)
  10 siblings, 1 reply; 33+ messages in thread
From: David Gibson @ 2022-09-08  3:59 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 873 bytes --]

--netns-only is supposed to make pasta use only a network namespace, not
a user namespace.  However, pasta_start_ns() has this backwards, and if
--netns-only is specified it creates a user namespace but *not* a network
namespace.  Correct this.

Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au>
---
 pasta.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pasta.c b/pasta.c
index 0fd45e4..7eac8e9 100644
--- a/pasta.c
+++ b/pasta.c
@@ -244,8 +244,8 @@ void pasta_start_ns(struct ctx *c, int argc, char *argv[])
 
 	pasta_child_pid = clone(pasta_setup_ns,
 				ns_fn_stack + sizeof(ns_fn_stack) / 2,
-				(c->netns_only ? 0 : CLONE_NEWNET) |
-				CLONE_NEWIPC | CLONE_NEWPID | CLONE_NEWUSER |
+				(c->netns_only ? 0 : CLONE_NEWUSER) |
+				CLONE_NEWIPC | CLONE_NEWPID | CLONE_NEWNET |
 				CLONE_NEWUTS,
 				(void *)&arg);
 
-- 
@@ -244,8 +244,8 @@ void pasta_start_ns(struct ctx *c, int argc, char *argv[])
 
 	pasta_child_pid = clone(pasta_setup_ns,
 				ns_fn_stack + sizeof(ns_fn_stack) / 2,
-				(c->netns_only ? 0 : CLONE_NEWNET) |
-				CLONE_NEWIPC | CLONE_NEWPID | CLONE_NEWUSER |
+				(c->netns_only ? 0 : CLONE_NEWUSER) |
+				CLONE_NEWIPC | CLONE_NEWPID | CLONE_NEWNET |
 				CLONE_NEWUTS,
 				(void *)&arg);
 
-- 
2.37.3


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH v2 09/10] Handle userns isolation and dropping root at the same time
  2022-09-08  3:58 [PATCH v2 00/10] Clean up handling of userns David Gibson
                   ` (7 preceding siblings ...)
  2022-09-08  3:59 ` [PATCH v2 08/10] Correctly handle --netns-only in pasta_start_ns() David Gibson
@ 2022-09-08  3:59 ` David Gibson
  2022-09-08  3:59 ` [PATCH v2 10/10] Allow --userns when pasta spawns a command David Gibson
  2022-09-09 14:36 ` [PATCH v2 00/10] Clean up handling of userns Stefano Brivio
  10 siblings, 0 replies; 33+ messages in thread
From: David Gibson @ 2022-09-08  3:59 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 12918 bytes --]

passt/pasta can interact with user namespaces in a number of ways:
   1) With --netns-only we'll remain in our original user namespace
   2) With --userns or a PID option to pasta we'll join either the given
      user namespace or that of the PID
   3) When pasta spawns a shell or command we'll start a new user namespace
      for the command and then join it
   4) With passt we'll create a new user namespace when we sandbox()
      ourself

However (3) and (4) turn out to have essentially the same effect.  In both
cases we create one new user namespace.  The spawned command starts there,
and passt/pasta itself will live there from sandbox() onwards.

Because of this, we can simplify user namespace handling by moving the
userns handling earlier, to the same point we drop root in the original
namespace.  Extend the drop_user() function to isolate_user() which does
both.

After switching UID and GID in the original userns, isolate_user() will
either join or create the userns we require.  When we spawn a command with
pasta_start_ns()/pasta_setup_ns() we no longer need to create a userns,
because we're already made one.  sandbox() likewise no longer needs to
create (or join) an userns because we're already in the one we need.

We no longer need c->pasta_userns_fd, since the fd is only used locally
in isolate_user().  Likewise we can replace c->netns_only with a local
in conf(), since it's not used outside there.

Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au>
---
 conf.c      | 11 ++++----
 isolation.c | 75 ++++++++++++++++++++++++++++++++++++++++++-----------
 isolation.h |  2 +-
 passt.c     |  3 ++-
 passt.h     |  4 ---
 pasta.c     | 56 +++++----------------------------------
 pasta.h     |  2 +-
 util.c      |  5 ----
 8 files changed, 77 insertions(+), 81 deletions(-)

diff --git a/conf.c b/conf.c
index 7171748..27d520e 100644
--- a/conf.c
+++ b/conf.c
@@ -1043,6 +1043,7 @@ static int conf_ugid(const char *runas, uid_t *uid, gid_t *gid)
  */
 void conf(struct ctx *c, int argc, char **argv)
 {
+	int netns_only = 0;
 	struct option options[] = {
 		{"debug",	no_argument,		NULL,		'd' },
 		{"quiet",	no_argument,		NULL,		'q' },
@@ -1077,7 +1078,7 @@ void conf(struct ctx *c, int argc, char **argv)
 		{"udp-ns",	required_argument,	NULL,		'U' },
 		{"userns",	required_argument,	NULL,		2 },
 		{"netns",	required_argument,	NULL,		3 },
-		{"netns-only",	no_argument,		&c->netns_only,	1 },
+		{"netns-only",	no_argument,		&netns_only,	1 },
 		{"config-net",	no_argument,		&c->pasta_conf_ns, 1 },
 		{"ns-mac-addr",	required_argument,	NULL,		4 },
 		{"dhcp-dns",	no_argument,		NULL,		5 },
@@ -1515,22 +1516,22 @@ void conf(struct ctx *c, int argc, char **argv)
 	if (ret)
 		usage(argv[0]);
 
-	drop_root(uid, gid);
-
 	if (c->mode == MODE_PASTA) {
-		if (conf_pasta_ns(&c->netns_only, userns, netns,
+		if (conf_pasta_ns(&netns_only, userns, netns,
 				  optind, argc, argv) < 0)
 			usage(argv[0]);
 	} else if (optind != argc) {
 		usage(argv[0]);
 	}
 
+	isolate_user(uid, gid, !netns_only, userns);
+
 	if (c->pasta_conf_ns)
 		c->no_ra = 1;
 
 	if (c->mode == MODE_PASTA) {
 		if (*netns) {
-			pasta_open_ns(c, userns, netns);
+			pasta_open_ns(c, netns);
 		} else {
 			pasta_start_ns(c, argc - optind, argv + optind);
 		}
diff --git a/isolation.c b/isolation.c
index bc8240f..f14b154 100644
--- a/isolation.c
+++ b/isolation.c
@@ -20,6 +20,7 @@
 #include <limits.h>
 #include <pwd.h>
 #include <sched.h>
+#include <stdbool.h>
 #include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
@@ -59,12 +60,19 @@ void drop_caps(void)
 }
 
 /**
- * drop_root() - Switch to given UID and GID
+ * isolate_user() - Switch to final UID/GID and move into userns
+ * @uid:	UID to run as (in original userns)
+ * @gid:	GID to run as (in original userns)
+ * @use_userns:	Whether to join or create a userns
+ * @userns:	userns path to enter, may be empty
  */
-void drop_root(uid_t uid, gid_t gid)
+void isolate_user(uid_t uid, gid_t gid, bool use_userns, const char *userns)
 {
+	char nsmap[BUFSIZ];
+
+	/* First set our UID & GID in the original namespace */
 	if (setgroups(0, NULL)) {
-		/* If we don't start with CAP_SETGID, this will EPERM */
+		/* If we don't have CAP_SETGID, this will EPERM */
 		if (errno != EPERM) {
 			err("Can't drop supplementary groups: %s",
 			    strerror(errno));
@@ -72,11 +80,57 @@ void drop_root(uid_t uid, gid_t gid)
 		}
 	}
 
-	if (!setgid(gid) && !setuid(uid))
+	if (setgid(gid) != 0) {
+		err("Can't set GID to %u: %s", gid, strerror(errno));
+		exit(EXIT_FAILURE);
+	}
+
+	if (setuid(uid) != 0) {
+		err("Can't set UID to %u: %s", uid, strerror(errno));
+		exit(EXIT_FAILURE);
+	}
+
+	/* If we're told not to use a userns, nothing more to do */
+	if (!use_userns)
+		return;
+
+	/* Otherwise, if given a userns, join it */
+	if (*userns) {
+		int ufd;
+
+		ufd = open(userns, O_RDONLY | O_CLOEXEC);
+		if (ufd < 0) {
+			err("Couldn't open user namespace %s: %s",
+			    userns, strerror(errno));
+			exit(EXIT_FAILURE);
+		}
+
+		if (setns(ufd, CLONE_NEWUSER) != 0) {
+			err("Couldn't enter user namespace %s: %s",
+			    userns, strerror(errno));
+			exit(EXIT_FAILURE);
+		}
+
+		close(ufd);
+
 		return;
+	}
 
-	err("Can't change user/group, exiting");
-	exit(EXIT_FAILURE);
+	/* Otherwise, create our own userns */
+	if (unshare(CLONE_NEWUSER) != 0) {
+		err("Couldn't create user namespace: %s", strerror(errno));
+		exit(EXIT_FAILURE);
+	}
+
+	/* Configure user and group mappings */
+	snprintf(nsmap, BUFSIZ, "0 %u 1", uid);
+	FWRITE("/proc/self/uid_map", nsmap, "Cannot set uid_map in namespace");
+
+	FWRITE("/proc/self/setgroups", "deny",
+	       "Cannot write to setgroups in namespace");
+
+	snprintf(nsmap, BUFSIZ, "0 %u 1", gid);
+	FWRITE("/proc/self/gid_map", nsmap, "Cannot set gid_map in namespace");
 }
 
 /**
@@ -88,15 +142,6 @@ int sandbox(struct ctx *c)
 {
 	int flags = CLONE_NEWIPC | CLONE_NEWNS | CLONE_NEWUTS;
 
-	if (!c->netns_only) {
-		if (c->pasta_userns_fd == -1)
-			flags |= CLONE_NEWUSER;
-		else
-			setns(c->pasta_userns_fd, CLONE_NEWUSER);
-	}
-
-	c->pasta_userns_fd = -1;
-
 	/* If we run in foreground, we have no chance to actually move to a new
 	 * PID namespace. For passt, use CLONE_NEWPID anyway, in case somebody
 	 * ever gets around seccomp profiles -- there's no harm in passing it.
diff --git a/isolation.h b/isolation.h
index 2540a35..2c73df7 100644
--- a/isolation.h
+++ b/isolation.h
@@ -8,7 +8,7 @@
 #define ISOLATION_H
 
 void drop_caps(void);
-void drop_root(uid_t uid, gid_t gid);
+void isolate_user(uid_t uid, gid_t gid, bool use_userns, const char *userns);
 int sandbox(struct ctx *c);
 void seccomp(const struct ctx *c);
 
diff --git a/passt.c b/passt.c
index 2a8314c..f0ed897 100644
--- a/passt.c
+++ b/passt.c
@@ -23,6 +23,7 @@
 #include <fcntl.h>
 #include <sys/mman.h>
 #include <sys/resource.h>
+#include <stdbool.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <netdb.h>
@@ -185,7 +186,7 @@ int main(int argc, char **argv)
 
 	drop_caps();
 
-	c.pasta_userns_fd = c.pasta_netns_fd = c.fd_tap = c.fd_tap_listen = -1;
+	c.pasta_netns_fd = c.fd_tap = c.fd_tap_listen = -1;
 
 	sigemptyset(&sa.sa_mask);
 	sa.sa_flags = 0;
diff --git a/passt.h b/passt.h
index 3035430..8c8d710 100644
--- a/passt.h
+++ b/passt.h
@@ -145,8 +145,6 @@ struct ip6_ctx {
  * @pcap:		Path for packet capture file
  * @pid_file:		Path to PID file, empty string if not configured
  * @pasta_netns_fd:	File descriptor for network namespace in pasta mode
- * @pasta_userns_fd:	Descriptor for user namespace to join, -1 once joined
- * @netns_only:		In pasta mode, don't join or create a user namespace
  * @no_netns_quit:	In pasta mode, don't exit if fs-bound namespace is gone
  * @netns_base:		Base name for fs-bound namespace, if any, in pasta mode
  * @netns_dir:		Directory of fs-bound namespace, if any, in pasta mode
@@ -197,8 +195,6 @@ struct ctx {
 	char pid_file[PATH_MAX];
 
 	int pasta_netns_fd;
-	int pasta_userns_fd;
-	int netns_only;
 
 	int no_netns_quit;
 	char netns_base[PATH_MAX];
diff --git a/pasta.c b/pasta.c
index 7eac8e9..e233762 100644
--- a/pasta.c
+++ b/pasta.c
@@ -23,6 +23,7 @@
 #include <libgen.h>
 #include <limits.h>
 #include <stdlib.h>
+#include <stdbool.h>
 #include <stdint.h>
 #include <unistd.h>
 #include <syslog.h>
@@ -83,16 +84,6 @@ static int pasta_wait_for_ns(void *arg)
 	int flags = O_RDONLY | O_CLOEXEC;
 	char ns[PATH_MAX];
 
-	if (c->netns_only)
-		goto netns;
-
-	snprintf(ns, PATH_MAX, "/proc/%i/ns/user", pasta_child_pid);
-	do
-		while ((c->pasta_userns_fd = open(ns, flags)) < 0);
-	while (setns(c->pasta_userns_fd, CLONE_NEWUSER) &&
-	       !close(c->pasta_userns_fd));
-
-netns:
 	snprintf(ns, PATH_MAX, "/proc/%i/ns/net", pasta_child_pid);
 	do
 		while ((c->pasta_netns_fd = open(ns, flags)) < 0);
@@ -112,25 +103,23 @@ static int ns_check(void *arg)
 {
 	struct ctx *c = (struct ctx *)arg;
 
-	if ((!c->netns_only && setns(c->pasta_userns_fd, CLONE_NEWUSER)) ||
-	    setns(c->pasta_netns_fd, CLONE_NEWNET))
-		c->pasta_userns_fd = c->pasta_netns_fd = -1;
+	if (setns(c->pasta_netns_fd, CLONE_NEWNET))
+		c->pasta_netns_fd = -1;
 
 	return 0;
 
 }
 
 /**
- * pasta_open_ns() - Open network, user namespaces descriptors
+ * pasta_open_ns() - Open network namespace descriptors
  * @c:		Execution context
- * @userns:	--userns argument, can be an empty string
  * @netns:	network namespace path
  *
  * Return: 0 on success, negative error code otherwise
  */
-void pasta_open_ns(struct ctx *c, const char *userns, const char *netns)
+void pasta_open_ns(struct ctx *c, const char *netns)
 {
-	int ufd = -1, nfd = -1;
+	int nfd = -1;
 
 	nfd = open(netns, O_RDONLY | O_CLOEXEC);
 	if (nfd < 0) {
@@ -138,17 +127,7 @@ void pasta_open_ns(struct ctx *c, const char *userns, const char *netns)
 		exit(EXIT_FAILURE);
 	}
 
-	if (!c->netns_only && *userns) {
-		ufd = open(userns, O_RDONLY | O_CLOEXEC);
-		if (ufd < 0) {
-			close(nfd);
-			err("Couldn't open user namespace %s", userns);
-			exit(EXIT_FAILURE);
-		}
-	}
-
 	c->pasta_netns_fd = nfd;
-	c->pasta_userns_fd = ufd;
 
 	NS_CALL(ns_check, c);
 
@@ -169,12 +148,9 @@ void pasta_open_ns(struct ctx *c, const char *userns, const char *netns)
 
 /**
  * struct pasta_setup_ns_arg - Argument for pasta_setup_ns()
- * @c:		Execution context
- * @euid:	Effective UID of caller
+ * @argv:	Command and arguments to run
  */
 struct pasta_setup_ns_arg {
-	struct ctx *c;
-	int euid;
 	char **argv;
 };
 
@@ -188,21 +164,6 @@ static int pasta_setup_ns(void *arg)
 {
 	struct pasta_setup_ns_arg *a = (struct pasta_setup_ns_arg *)arg;
 
-	if (!a->c->netns_only) {
-		char buf[BUFSIZ];
-
-		snprintf(buf, BUFSIZ, "%i %i %i", 0, a->euid, 1);
-
-		FWRITE("/proc/self/uid_map", buf,
-		       "Cannot set uid_map in namespace");
-
-		FWRITE("/proc/self/setgroups", "deny",
-		       "Cannot write to setgroups in namespace");
-
-		FWRITE("/proc/self/gid_map", buf,
-		       "Cannot set gid_map in namespace");
-	}
-
 	FWRITE("/proc/sys/net/ipv4/ping_group_range", "0 0",
 	       "Cannot set ping_group_range, ICMP requests might fail");
 
@@ -221,8 +182,6 @@ static int pasta_setup_ns(void *arg)
 void pasta_start_ns(struct ctx *c, int argc, char *argv[])
 {
 	struct pasta_setup_ns_arg arg = {
-		.c = c,
-		.euid = geteuid(),
 		.argv = argv,
 	};
 	char *shell = getenv("SHELL") ? getenv("SHELL") : "/bin/sh";
@@ -244,7 +203,6 @@ void pasta_start_ns(struct ctx *c, int argc, char *argv[])
 
 	pasta_child_pid = clone(pasta_setup_ns,
 				ns_fn_stack + sizeof(ns_fn_stack) / 2,
-				(c->netns_only ? 0 : CLONE_NEWUSER) |
 				CLONE_NEWIPC | CLONE_NEWPID | CLONE_NEWNET |
 				CLONE_NEWUTS,
 				(void *)&arg);
diff --git a/pasta.h b/pasta.h
index a1937b2..02df1f6 100644
--- a/pasta.h
+++ b/pasta.h
@@ -6,7 +6,7 @@
 #ifndef PASTA_H
 #define PASTA_H
 
-void pasta_open_ns(struct ctx *c, const char *userns, const char *netns);
+void pasta_open_ns(struct ctx *c, const char *netns);
 void pasta_start_ns(struct ctx *c, int argc, char *argv[]);
 void pasta_ns_conf(struct ctx *c);
 void pasta_child_handler(int signal);
diff --git a/util.c b/util.c
index f709838..8d26561 100644
--- a/util.c
+++ b/util.c
@@ -464,11 +464,6 @@ void procfs_scan_listen(struct ctx *c, uint8_t proto, int ip_version, int ns,
  */
 int ns_enter(const struct ctx *c)
 {
-	if (!c->netns_only &&
-	    c->pasta_userns_fd != -1 &&
-	    setns(c->pasta_userns_fd, CLONE_NEWUSER))
-		exit(EXIT_FAILURE);
-
 	if (setns(c->pasta_netns_fd, CLONE_NEWNET))
 		exit(EXIT_FAILURE);
 
-- 
@@ -464,11 +464,6 @@ void procfs_scan_listen(struct ctx *c, uint8_t proto, int ip_version, int ns,
  */
 int ns_enter(const struct ctx *c)
 {
-	if (!c->netns_only &&
-	    c->pasta_userns_fd != -1 &&
-	    setns(c->pasta_userns_fd, CLONE_NEWUSER))
-		exit(EXIT_FAILURE);
-
 	if (setns(c->pasta_netns_fd, CLONE_NEWNET))
 		exit(EXIT_FAILURE);
 
-- 
2.37.3


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH v2 10/10] Allow --userns when pasta spawns a command
  2022-09-08  3:58 [PATCH v2 00/10] Clean up handling of userns David Gibson
                   ` (8 preceding siblings ...)
  2022-09-08  3:59 ` [PATCH v2 09/10] Handle userns isolation and dropping root at the same time David Gibson
@ 2022-09-08  3:59 ` David Gibson
  2022-09-09 14:34   ` Stefano Brivio
  2022-09-09 14:36 ` [PATCH v2 00/10] Clean up handling of userns Stefano Brivio
  10 siblings, 1 reply; 33+ messages in thread
From: David Gibson @ 2022-09-08  3:59 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 929 bytes --]

Currently --userns is only allowed when pasta is attaching to an existing
netns or PID, and is prohibited when creating a new netns by spawning a
command or shell.

With the new handling of userns, this check isn't neccessary.  I'm not sure
if there's any use case for --userns with a spawned command, but it's
strictly more flexible and requires zero extra code, so we might as well.

Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au>
---
 conf.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/conf.c b/conf.c
index 27d520e..ec191c2 100644
--- a/conf.c
+++ b/conf.c
@@ -561,11 +561,6 @@ static int conf_pasta_ns(int *netns_only, char *userns, char *netns,
 		}
 	}
 
-	if (*userns && !*netns) {
-		err("--userns requires --netns or PID");
-		return -EINVAL;
-	}
-
 	/* Attaching to a netns/PID, with no userns given */
 	if (*netns && !*userns)
 		*netns_only = 1;
-- 
@@ -561,11 +561,6 @@ static int conf_pasta_ns(int *netns_only, char *userns, char *netns,
 		}
 	}
 
-	if (*userns && !*netns) {
-		err("--userns requires --netns or PID");
-		return -EINVAL;
-	}
-
 	/* Attaching to a netns/PID, with no userns given */
 	if (*netns && !*userns)
 		*netns_only = 1;
-- 
2.37.3


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 02/10] Split checking for root from dropping root privilege
  2022-09-08  3:58 ` [PATCH v2 02/10] Split checking for root from dropping root privilege David Gibson
@ 2022-09-09 14:33   ` Stefano Brivio
  2022-09-10  7:09     ` David Gibson
  0 siblings, 1 reply; 33+ messages in thread
From: Stefano Brivio @ 2022-09-09 14:33 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 2305 bytes --]

Just some ridiculous nitpicking on this one:

On Thu,  8 Sep 2022 13:58:59 +1000
David Gibson <david(a)gibson.dropbear.id.au> wrote:

> [...]
>
> +++ b/passt.1
> @@ -104,9 +104,10 @@ terminal, and to both system logger and standard error otherwise.
>  
>  .TP
>  .BR \-\-runas " " \fIUID\fR|\fIUID:GID\fR|\fILOGIN\fR|\fILOGIN:GROUP\fR
> -If started as root, change to given UID and corresponding group if UID is given,
> +Attempt to change to given UID and corresponding group if UID is given,
>  or to given UID and given GID if both are given. Alternatively, login name, or
> -login name and group name can be passed.
> +login name and group name can be passed.  This requires privilege (either

I'd change this to "privileges", right?

Also, adding two spaces following a period, as you do, seems to have
some merits:

  https://link.springer.com/article/10.3758/s13414-018-1527-6

  Johnson, R.L., Bui, B. & Schmitt, L.L. Are two spaces better than
  one? The effect of spacing following periods and commas during reading.
  Atten Percept Psychophys 80, 1504–1511 (2018)

...but in man pages, nroff might turn that into three or more spaces,
inconsistently, in a justified paragraph.

I'd stick to one. Or change all of them (I almost never use two, so
here it's all single spaces).

> +initial effective UID 0 or CAP_SETUID capability) to work.
>  Default is to change to user \fInobody\fR if started as root.
>  
>  .TP
> diff --git a/util.c b/util.c
> index b2ccb3d..17595c1 100644
> --- a/util.c
> +++ b/util.c
> @@ -492,7 +492,13 @@ void check_root(uid_t *uid, gid_t *gid)
>  	char buf[BUFSIZ];
>  	int fd;
>  
> -	if (getuid() && geteuid())
> +	if (!*uid)
> +		*uid = geteuid();
> +
> +	if (!*gid)
> +		*gid = getegid();
> +
> +	if (*uid)
>  		return;
>  
>  	if ((fd = open("/proc/self/uid_map", O_RDONLY | O_CLOEXEC)) < 0)
> @@ -524,11 +530,26 @@ void check_root(uid_t *uid, gid_t *gid)
>  		*uid = *gid = 65534;
>  #endif
>  	}
> +}
> +
> +/**
> + * drop_root() - Switch to given UID and GID

I would add the usual:

 * @uid:	User ID to switch to
 * @gid:	Group ID to switch to

even though it's totally obvious here, in case somebody should ever need
to parse this stuff to produce documentation.

-- 
Stefano


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 03/10] Consolidate determination of UID/GID to run as
  2022-09-08  3:59 ` [PATCH v2 03/10] Consolidate determination of UID/GID to run as David Gibson
@ 2022-09-09 14:33   ` Stefano Brivio
  2022-09-10  7:15     ` David Gibson
  0 siblings, 1 reply; 33+ messages in thread
From: Stefano Brivio @ 2022-09-09 14:33 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 2542 bytes --]

Also mere nitpicking on this one:

On Thu,  8 Sep 2022 13:59:00 +1000
David Gibson <david(a)gibson.dropbear.id.au> wrote:

> Currently the logic to work out what UID and GID we will run as is spread
> across conf().  If --runas is specified it's handled in conf_runas(),
> otherwise it's handled by check_root(), which depends on initialization of
> the uid and gid variables by either conf() itself or conf_runas().
> 
> Make this clearer by putting all the UID and GID logic into a single
> conf_ugid() function.
> 
> Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au>
> ---
>  conf.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++------
>  util.c | 50 ------------------------------------
>  util.h |  1 -
>  3 files changed, 73 insertions(+), 59 deletions(-)
> 
> diff --git a/conf.c b/conf.c
> index 545f61d..5c293b5 100644
> --- a/conf.c
> +++ b/conf.c
> @@ -1021,6 +1021,70 @@ static int conf_runas(const char *opt, unsigned int *uid, unsigned int *gid)
>  #endif /* !GLIBC_NO_STATIC_NSS */
>  }
>  
> +/**
> + * conf_ugid() - Determine UID and GID to run as
> + * @runas:	--runas option, may be NULL
> + * @uid:	User ID, set on success
> + * @gid:	Group ID, set on success
> + *
> + * Return: 0 on success, negative error code on failure
> + */
> +static int conf_ugid(const char *runas, uid_t *uid, gid_t *gid)
> +{
> +	const char root_uid_map[] = "         0          0 4294967295";
> +	struct passwd *pw;
> +	char buf[BUFSIZ];
> +	int ret;
> +	int fd;
> +
> +	/* If user has specified --runas, that takes precedence */
> +	if (runas) {
> +		ret = conf_runas(runas, uid, gid);
> +		if (ret)
> +			err("Invalid --runas option: %s", runas);
> +		return ret;
> +	}
> +
> +	/* Otherwise default to current user and group.. */
> +	*uid = geteuid();
> +	*gid = getegid();
> +
> +	/* ..as long as it's not root.. */
> +	if (*uid)
> +		return 0;
> +
> +	/* ..or at least not root in the init namespace.. */
> +	if ((fd = open("/proc/self/uid_map", O_RDONLY | O_CLOEXEC)) < 0)
> +		return 0;
> +
> +	if (read(fd, buf, BUFSIZ) != sizeof(root_uid_map) ||
> +	    strncmp(buf, root_uid_map, sizeof(root_uid_map) - 1)) {
> +		close(fd);
> +		return 0;
> +	}
> +
> +	close(fd);
> +
> +	/* ..otherwise use nobody:nobody */

I'd change all those comment to use ellipses (...) instead of "..".

I think your comments here help a lot, by the way (and I couldn't find
a better way to check for UID 0 in non-init other than that hack).

-- 
Stefano


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 04/10] Safer handling if we can't open /proc/self/uid_map
  2022-09-08  3:59 ` [PATCH v2 04/10] Safer handling if we can't open /proc/self/uid_map David Gibson
@ 2022-09-09 14:33   ` Stefano Brivio
  2022-09-10  7:23     ` David Gibson
  0 siblings, 1 reply; 33+ messages in thread
From: Stefano Brivio @ 2022-09-09 14:33 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 1068 bytes --]

On Thu,  8 Sep 2022 13:59:01 +1000
David Gibson <david(a)gibson.dropbear.id.au> wrote:

> passt is allowed to run as "root" (UID 0) in a user namespace, but notas
> real root in the init namespace.  We read /proc/self/uid_map to determine
> if we're in the init namespace or not.
> 
> If we're unable to open /proc/self/uid_map we assume we're ok and continue
> running as UID 0.  This seems unwise: AFAIK the only instance in which
> uid_map won't be available is if we're running on a kernel which doesn't
> support user namespaces, in which case we won't be able to sandbox
> ourselves as we want and fail anyway.

Well, if user namespaces are not supported and the UID is 0, then we're
actually running as root, so we should quit anyway.

> If there are other circumstances
> where it can't be opened it seems marginally more likely that we *are*
> in the init namespace.

That could also happen if procfs is not mounted, but I'm not sure what
would work then.

> Therefore, fail with an error in this case, instead of carrying on.

Yes, absolutely.

-- 
Stefano


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 05/10] Move self-isolation code into a separate file
  2022-09-08  3:59 ` [PATCH v2 05/10] Move self-isolation code into a separate file David Gibson
@ 2022-09-09 14:33   ` Stefano Brivio
  2022-09-10  7:23     ` David Gibson
  0 siblings, 1 reply; 33+ messages in thread
From: Stefano Brivio @ 2022-09-09 14:33 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 397 bytes --]

On Thu,  8 Sep 2022 13:59:02 +1000
David Gibson <david(a)gibson.dropbear.id.au> wrote:

> [...]
>
> +++ b/isolation.c
>
> [...]
>
> +/**
> + * sandbox() - Unshare IPC, mount, PID, UTS, and user namespaces, "unmount" root
> + *
> + * Return: negative error code on failure, zero on success
> + */
> +int sandbox(struct ctx *c)

Same here, I would "document" "c".

-- 
Stefano


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 08/10] Correctly handle --netns-only in pasta_start_ns()
  2022-09-08  3:59 ` [PATCH v2 08/10] Correctly handle --netns-only in pasta_start_ns() David Gibson
@ 2022-09-09 14:34   ` Stefano Brivio
  2022-09-10  7:25     ` David Gibson
  0 siblings, 1 reply; 33+ messages in thread
From: Stefano Brivio @ 2022-09-09 14:34 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 1258 bytes --]

On Thu,  8 Sep 2022 13:59:05 +1000
David Gibson <david(a)gibson.dropbear.id.au> wrote:

> --netns-only is supposed to make pasta use only a network namespace, not
> a user namespace.  However, pasta_start_ns() has this backwards, and if
> --netns-only is specified it creates a user namespace but *not* a network
> namespace.  Correct this.
> 
> Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au>
> ---
>  pasta.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/pasta.c b/pasta.c
> index 0fd45e4..7eac8e9 100644
> --- a/pasta.c
> +++ b/pasta.c
> @@ -244,8 +244,8 @@ void pasta_start_ns(struct ctx *c, int argc, char *argv[])
>  
>  	pasta_child_pid = clone(pasta_setup_ns,
>  				ns_fn_stack + sizeof(ns_fn_stack) / 2,
> -				(c->netns_only ? 0 : CLONE_NEWNET) |
> -				CLONE_NEWIPC | CLONE_NEWPID | CLONE_NEWUSER |
> +				(c->netns_only ? 0 : CLONE_NEWUSER) |
> +				CLONE_NEWIPC | CLONE_NEWPID | CLONE_NEWNET |

Oh, funny, so it never worked in this case.

I thought anyway your plan was to drop --netns-only altogether, and if
a --netns option is specified without --userns, then it's implied. Is
that still on the table (outside the scope of this series I presume)?

-- 
Stefano


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 10/10] Allow --userns when pasta spawns a command
  2022-09-08  3:59 ` [PATCH v2 10/10] Allow --userns when pasta spawns a command David Gibson
@ 2022-09-09 14:34   ` Stefano Brivio
  2022-09-10  7:29     ` David Gibson
  0 siblings, 1 reply; 33+ messages in thread
From: Stefano Brivio @ 2022-09-09 14:34 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 1494 bytes --]

On Thu,  8 Sep 2022 13:59:07 +1000
David Gibson <david(a)gibson.dropbear.id.au> wrote:

> Currently --userns is only allowed when pasta is attaching to an existing
> netns or PID, and is prohibited when creating a new netns by spawning a
> command or shell.
> 
> With the new handling of userns, this check isn't neccessary.  I'm not sure
> if there's any use case for --userns with a spawned command, but it's
> strictly more flexible and requires zero extra code, so we might as well.

I think it's helpful because one might not be able to join a network
namespace without first joining a given user namespace.

So, if you want to run any network-ish command in such a network
namespace, using pasta instead of nsenter for whatever reason, this
possibility might be practical.

> Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au>
> ---
>  conf.c | 5 -----
>  1 file changed, 5 deletions(-)
> 
> diff --git a/conf.c b/conf.c
> index 27d520e..ec191c2 100644
> --- a/conf.c
> +++ b/conf.c
> @@ -561,11 +561,6 @@ static int conf_pasta_ns(int *netns_only, char *userns, char *netns,
>  		}
>  	}
>  
> -	if (*userns && !*netns) {
> -		err("--userns requires --netns or PID");
> -		return -EINVAL;
> -	}

I guess we should now drop this sentence about --userns from the man
page:

  This option requires --netns or a PID to be specified.

...either drop it, or clarify that a command might also be given
instead, I'm not sure.

-- 
Stefano


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 00/10] Clean up handling of userns
  2022-09-08  3:58 [PATCH v2 00/10] Clean up handling of userns David Gibson
                   ` (9 preceding siblings ...)
  2022-09-08  3:59 ` [PATCH v2 10/10] Allow --userns when pasta spawns a command David Gibson
@ 2022-09-09 14:36 ` Stefano Brivio
  2022-09-10  7:30   ` David Gibson
  10 siblings, 1 reply; 33+ messages in thread
From: Stefano Brivio @ 2022-09-09 14:36 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 1033 bytes --]

On Thu,  8 Sep 2022 13:58:57 +1000
David Gibson <david(a)gibson.dropbear.id.au> wrote:

> Sorry for the resend, but I found a bug that means this will fail to
> build on some distros / versions.
> 
> Our handling of user namespaces is more complex than it needs to be.
> This simplifies the handling by identifying and entering (or creating)
> the correct userns earlier, so that later code doesn't need to deal
> with it any more.
> 
> Along the way we make a number of other cleanups to handling of userns
> and setting our user and group.
> 
> This is based on my earlier test command dispatch and performance test
> cleanup series.
> 
> Changes since v1:
>  * Fixed overenthusiastic pruning of #includes when moving the
>    self-isolation code which broke compile on some distro versions

I haven't tested this yet, but it looks great.

I can fix up those small things I reported directly while merging, if
you agree, and apply.

A suggestion on how to update the man page as a result of 10/10 would
help, though.

-- 
Stefano


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 02/10] Split checking for root from dropping root privilege
  2022-09-09 14:33   ` Stefano Brivio
@ 2022-09-10  7:09     ` David Gibson
  0 siblings, 0 replies; 33+ messages in thread
From: David Gibson @ 2022-09-10  7:09 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 3231 bytes --]

On Fri, Sep 09, 2022 at 04:33:27PM +0200, Stefano Brivio wrote:
> Just some ridiculous nitpicking on this one:
> 
> On Thu,  8 Sep 2022 13:58:59 +1000
> David Gibson <david(a)gibson.dropbear.id.au> wrote:
> 
> > [...]
> >
> > +++ b/passt.1
> > @@ -104,9 +104,10 @@ terminal, and to both system logger and standard error otherwise.
> >  
> >  .TP
> >  .BR \-\-runas " " \fIUID\fR|\fIUID:GID\fR|\fILOGIN\fR|\fILOGIN:GROUP\fR
> > -If started as root, change to given UID and corresponding group if UID is given,
> > +Attempt to change to given UID and corresponding group if UID is given,
> >  or to given UID and given GID if both are given. Alternatively, login name, or
> > -login name and group name can be passed.
> > +login name and group name can be passed.  This requires privilege (either
> 
> I'd change this to "privileges", right?

Hmm.. I think either would work, but I'll change it.


> Also, adding two spaces following a period, as you do, seems to have
> some merits:
> 
>   https://link.springer.com/article/10.3758/s13414-018-1527-6
> 
>   Johnson, R.L., Bui, B. & Schmitt, L.L. Are two spaces better than
>   one? The effect of spacing following periods and commas during reading.
>   Atten Percept Psychophys 80, 1504–1511 (2018)
> 
> ...but in man pages, nroff might turn that into three or more spaces,
> inconsistently, in a justified paragraph.
> 
> I'd stick to one. Or change all of them (I almost never use two, so
> here it's all single spaces).

Fair enough.  I believe the question of spaces after a full stop
(British/Australian English for "period") is a bit of a contentious
issue.  At least in the English speaking world - I gather it's rarely
used outside that.  It's mostly irrelevant for modern typesetting,
because the typesetter will adjust anyways, I think I developed the
habit from writing text documents, at least some in the age of
monospaced printing.

I've removed it, anyway.

> > +initial effective UID 0 or CAP_SETUID capability) to work.
> >  Default is to change to user \fInobody\fR if started as root.
> >  
> >  .TP
> > diff --git a/util.c b/util.c
> > index b2ccb3d..17595c1 100644
> > --- a/util.c
> > +++ b/util.c
> > @@ -492,7 +492,13 @@ void check_root(uid_t *uid, gid_t *gid)
> >  	char buf[BUFSIZ];
> >  	int fd;
> >  
> > -	if (getuid() && geteuid())
> > +	if (!*uid)
> > +		*uid = geteuid();
> > +
> > +	if (!*gid)
> > +		*gid = getegid();
> > +
> > +	if (*uid)
> >  		return;
> >  
> >  	if ((fd = open("/proc/self/uid_map", O_RDONLY | O_CLOEXEC)) < 0)
> > @@ -524,11 +530,26 @@ void check_root(uid_t *uid, gid_t *gid)
> >  		*uid = *gid = 65534;
> >  #endif
> >  	}
> > +}
> > +
> > +/**
> > + * drop_root() - Switch to given UID and GID
> 
> I would add the usual:
> 
>  * @uid:	User ID to switch to
>  * @gid:	Group ID to switch to
> 
> even though it's totally obvious here, in case somebody should ever need
> to parse this stuff to produce documentation.

Good point, added.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 03/10] Consolidate determination of UID/GID to run as
  2022-09-09 14:33   ` Stefano Brivio
@ 2022-09-10  7:15     ` David Gibson
  2022-09-10 20:43       ` Stefano Brivio
  0 siblings, 1 reply; 33+ messages in thread
From: David Gibson @ 2022-09-10  7:15 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 3154 bytes --]

On Fri, Sep 09, 2022 at 04:33:47PM +0200, Stefano Brivio wrote:
> Also mere nitpicking on this one:
> 
> On Thu,  8 Sep 2022 13:59:00 +1000
> David Gibson <david(a)gibson.dropbear.id.au> wrote:
> 
> > Currently the logic to work out what UID and GID we will run as is spread
> > across conf().  If --runas is specified it's handled in conf_runas(),
> > otherwise it's handled by check_root(), which depends on initialization of
> > the uid and gid variables by either conf() itself or conf_runas().
> > 
> > Make this clearer by putting all the UID and GID logic into a single
> > conf_ugid() function.
> > 
> > Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au>
> > ---
> >  conf.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++------
> >  util.c | 50 ------------------------------------
> >  util.h |  1 -
> >  3 files changed, 73 insertions(+), 59 deletions(-)
> > 
> > diff --git a/conf.c b/conf.c
> > index 545f61d..5c293b5 100644
> > --- a/conf.c
> > +++ b/conf.c
> > @@ -1021,6 +1021,70 @@ static int conf_runas(const char *opt, unsigned int *uid, unsigned int *gid)
> >  #endif /* !GLIBC_NO_STATIC_NSS */
> >  }
> >  
> > +/**
> > + * conf_ugid() - Determine UID and GID to run as
> > + * @runas:	--runas option, may be NULL
> > + * @uid:	User ID, set on success
> > + * @gid:	Group ID, set on success
> > + *
> > + * Return: 0 on success, negative error code on failure
> > + */
> > +static int conf_ugid(const char *runas, uid_t *uid, gid_t *gid)
> > +{
> > +	const char root_uid_map[] = "         0          0 4294967295";
> > +	struct passwd *pw;
> > +	char buf[BUFSIZ];
> > +	int ret;
> > +	int fd;
> > +
> > +	/* If user has specified --runas, that takes precedence */
> > +	if (runas) {
> > +		ret = conf_runas(runas, uid, gid);
> > +		if (ret)
> > +			err("Invalid --runas option: %s", runas);
> > +		return ret;
> > +	}
> > +
> > +	/* Otherwise default to current user and group.. */
> > +	*uid = geteuid();
> > +	*gid = getegid();
> > +
> > +	/* ..as long as it's not root.. */
> > +	if (*uid)
> > +		return 0;
> > +
> > +	/* ..or at least not root in the init namespace.. */
> > +	if ((fd = open("/proc/self/uid_map", O_RDONLY | O_CLOEXEC)) < 0)
> > +		return 0;
> > +
> > +	if (read(fd, buf, BUFSIZ) != sizeof(root_uid_map) ||
> > +	    strncmp(buf, root_uid_map, sizeof(root_uid_map) - 1)) {
> > +		close(fd);
> > +		return 0;
> > +	}
> > +
> > +	close(fd);
> > +
> > +	/* ..otherwise use nobody:nobody */
> 
> I'd change all those comment to use ellipses (...) instead of "..".

Ok, nit picked.

> I think your comments here help a lot, by the way (and I couldn't find
> a better way to check for UID 0 in non-init other than that hack).

Right.  The extra complexity - both of code and of mental model - in
going from "not UID 0" to "not UID 0 in the init namespace" is what
makes me really wonder if this check is worth having.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 04/10] Safer handling if we can't open /proc/self/uid_map
  2022-09-09 14:33   ` Stefano Brivio
@ 2022-09-10  7:23     ` David Gibson
  0 siblings, 0 replies; 33+ messages in thread
From: David Gibson @ 2022-09-10  7:23 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 1577 bytes --]

On Fri, Sep 09, 2022 at 04:33:52PM +0200, Stefano Brivio wrote:
> On Thu,  8 Sep 2022 13:59:01 +1000
> David Gibson <david(a)gibson.dropbear.id.au> wrote:
> 
> > passt is allowed to run as "root" (UID 0) in a user namespace, but notas
> > real root in the init namespace.  We read /proc/self/uid_map to determine
> > if we're in the init namespace or not.
> > 
> > If we're unable to open /proc/self/uid_map we assume we're ok and continue
> > running as UID 0.  This seems unwise: AFAIK the only instance in which
> > uid_map won't be available is if we're running on a kernel which doesn't
> > support user namespaces, in which case we won't be able to sandbox
> > ourselves as we want and fail anyway.
> 
> Well, if user namespaces are not supported and the UID is 0, then we're
> actually running as root, so we should quit anyway.

Right, that's kind of the whole point of this patch, but it's a bit
obscured in the wording here because I realized we'd actually fail
later anyway.

> > If there are other circumstances
> > where it can't be opened it seems marginally more likely that we *are*
> > in the init namespace.
> 
> That could also happen if procfs is not mounted, but I'm not sure what
> would work then.

True.  I'll reword the commit message to make both points clearer.

> > Therefore, fail with an error in this case, instead of carrying on.
> 
> Yes, absolutely.
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 05/10] Move self-isolation code into a separate file
  2022-09-09 14:33   ` Stefano Brivio
@ 2022-09-10  7:23     ` David Gibson
  2022-09-10 20:43       ` Stefano Brivio
  0 siblings, 1 reply; 33+ messages in thread
From: David Gibson @ 2022-09-10  7:23 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 737 bytes --]

On Fri, Sep 09, 2022 at 04:33:58PM +0200, Stefano Brivio wrote:
> On Thu,  8 Sep 2022 13:59:02 +1000
> David Gibson <david(a)gibson.dropbear.id.au> wrote:
> 
> > [...]
> >
> > +++ b/isolation.c
> >
> > [...]
> >
> > +/**
> > + * sandbox() - Unshare IPC, mount, PID, UTS, and user namespaces, "unmount" root
> > + *
> > + * Return: negative error code on failure, zero on success
> > + */
> > +int sandbox(struct ctx *c)
> 
> Same here, I would "document" "c".

Not in scope for this patch, since it's a pure code motion.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 08/10] Correctly handle --netns-only in pasta_start_ns()
  2022-09-09 14:34   ` Stefano Brivio
@ 2022-09-10  7:25     ` David Gibson
  2022-09-11  8:26       ` David Gibson
  0 siblings, 1 reply; 33+ messages in thread
From: David Gibson @ 2022-09-10  7:25 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 1765 bytes --]

On Fri, Sep 09, 2022 at 04:34:20PM +0200, Stefano Brivio wrote:
> On Thu,  8 Sep 2022 13:59:05 +1000
> David Gibson <david(a)gibson.dropbear.id.au> wrote:
> 
> > --netns-only is supposed to make pasta use only a network namespace, not
> > a user namespace.  However, pasta_start_ns() has this backwards, and if
> > --netns-only is specified it creates a user namespace but *not* a network
> > namespace.  Correct this.
> > 
> > Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au>
> > ---
> >  pasta.c | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> > 
> > diff --git a/pasta.c b/pasta.c
> > index 0fd45e4..7eac8e9 100644
> > --- a/pasta.c
> > +++ b/pasta.c
> > @@ -244,8 +244,8 @@ void pasta_start_ns(struct ctx *c, int argc, char *argv[])
> >  
> >  	pasta_child_pid = clone(pasta_setup_ns,
> >  				ns_fn_stack + sizeof(ns_fn_stack) / 2,
> > -				(c->netns_only ? 0 : CLONE_NEWNET) |
> > -				CLONE_NEWIPC | CLONE_NEWPID | CLONE_NEWUSER |
> > +				(c->netns_only ? 0 : CLONE_NEWUSER) |
> > +				CLONE_NEWIPC | CLONE_NEWPID | CLONE_NEWNET |
> 
> Oh, funny, so it never worked in this case.
> 
> I thought anyway your plan was to drop --netns-only altogether, and if
> a --netns option is specified without --userns, then it's implied. Is
> that still on the table (outside the scope of this series I presume)?

Well.. having reduced the lifetome of netns_only to within conf()
alone, I felt a lot less urgency about removing it entirely.  It might
still be nicer to remove it anyway; I'll have another look.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 10/10] Allow --userns when pasta spawns a command
  2022-09-09 14:34   ` Stefano Brivio
@ 2022-09-10  7:29     ` David Gibson
  2022-09-10 20:42       ` Stefano Brivio
  0 siblings, 1 reply; 33+ messages in thread
From: David Gibson @ 2022-09-10  7:29 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 1966 bytes --]

On Fri, Sep 09, 2022 at 04:34:25PM +0200, Stefano Brivio wrote:
> On Thu,  8 Sep 2022 13:59:07 +1000
> David Gibson <david(a)gibson.dropbear.id.au> wrote:
> 
> > Currently --userns is only allowed when pasta is attaching to an existing
> > netns or PID, and is prohibited when creating a new netns by spawning a
> > command or shell.
> > 
> > With the new handling of userns, this check isn't neccessary.  I'm not sure
> > if there's any use case for --userns with a spawned command, but it's
> > strictly more flexible and requires zero extra code, so we might as well.
> 
> I think it's helpful because one might not be able to join a network
> namespace without first joining a given user namespace.

Well.. this is strictly for the spawning command case, so we're
creating the network ns rather than joining one.

> So, if you want to run any network-ish command in such a network
> namespace, using pasta instead of nsenter for whatever reason, this
> possibility might be practical.
> 
> > Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au>
> > ---
> >  conf.c | 5 -----
> >  1 file changed, 5 deletions(-)
> > 
> > diff --git a/conf.c b/conf.c
> > index 27d520e..ec191c2 100644
> > --- a/conf.c
> > +++ b/conf.c
> > @@ -561,11 +561,6 @@ static int conf_pasta_ns(int *netns_only, char *userns, char *netns,
> >  		}
> >  	}
> >  
> > -	if (*userns && !*netns) {
> > -		err("--userns requires --netns or PID");
> > -		return -EINVAL;
> > -	}
> 
> I guess we should now drop this sentence about --userns from the man
> page:
> 
>   This option requires --netns or a PID to be specified.
> 
> ...either drop it, or clarify that a command might also be given
> instead, I'm not sure.

Good point, I'll adjust.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 00/10] Clean up handling of userns
  2022-09-09 14:36 ` [PATCH v2 00/10] Clean up handling of userns Stefano Brivio
@ 2022-09-10  7:30   ` David Gibson
  0 siblings, 0 replies; 33+ messages in thread
From: David Gibson @ 2022-09-10  7:30 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 1391 bytes --]

On Fri, Sep 09, 2022 at 04:36:21PM +0200, Stefano Brivio wrote:
> On Thu,  8 Sep 2022 13:58:57 +1000
> David Gibson <david(a)gibson.dropbear.id.au> wrote:
> 
> > Sorry for the resend, but I found a bug that means this will fail to
> > build on some distros / versions.
> > 
> > Our handling of user namespaces is more complex than it needs to be.
> > This simplifies the handling by identifying and entering (or creating)
> > the correct userns earlier, so that later code doesn't need to deal
> > with it any more.
> > 
> > Along the way we make a number of other cleanups to handling of userns
> > and setting our user and group.
> > 
> > This is based on my earlier test command dispatch and performance test
> > cleanup series.
> > 
> > Changes since v1:
> >  * Fixed overenthusiastic pruning of #includes when moving the
> >    self-isolation code which broke compile on some distro versions
> 
> I haven't tested this yet, but it looks great.
> 
> I can fix up those small things I reported directly while merging, if
> you agree, and apply.
> 
> A suggestion on how to update the man page as a result of 10/10 would
> help, though.

I'll respin next week with that adjusted amongst other nits.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 10/10] Allow --userns when pasta spawns a command
  2022-09-10  7:29     ` David Gibson
@ 2022-09-10 20:42       ` Stefano Brivio
  0 siblings, 0 replies; 33+ messages in thread
From: Stefano Brivio @ 2022-09-10 20:42 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 1079 bytes --]

On Sat, 10 Sep 2022 17:29:35 +1000
David Gibson <david(a)gibson.dropbear.id.au> wrote:

> On Fri, Sep 09, 2022 at 04:34:25PM +0200, Stefano Brivio wrote:
> > On Thu,  8 Sep 2022 13:59:07 +1000
> > David Gibson <david(a)gibson.dropbear.id.au> wrote:
> >   
> > > Currently --userns is only allowed when pasta is attaching to an existing
> > > netns or PID, and is prohibited when creating a new netns by spawning a
> > > command or shell.
> > > 
> > > With the new handling of userns, this check isn't neccessary.  I'm not sure
> > > if there's any use case for --userns with a spawned command, but it's
> > > strictly more flexible and requires zero extra code, so we might as well.  
> > 
> > I think it's helpful because one might not be able to join a network
> > namespace without first joining a given user namespace.  
> 
> Well.. this is strictly for the spawning command case, so we're
> creating the network ns rather than joining one.

Ah, you're right. Then I'm also not sure. But yes, it's negative lines
of code, so why not.

-- 
Stefano


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 05/10] Move self-isolation code into a separate file
  2022-09-10  7:23     ` David Gibson
@ 2022-09-10 20:43       ` Stefano Brivio
  0 siblings, 0 replies; 33+ messages in thread
From: Stefano Brivio @ 2022-09-10 20:43 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 732 bytes --]

On Sat, 10 Sep 2022 17:23:56 +1000
David Gibson <david(a)gibson.dropbear.id.au> wrote:

> On Fri, Sep 09, 2022 at 04:33:58PM +0200, Stefano Brivio wrote:
> > On Thu,  8 Sep 2022 13:59:02 +1000
> > David Gibson <david(a)gibson.dropbear.id.au> wrote:
> >   
> > > [...]
> > >
> > > +++ b/isolation.c
> > >
> > > [...]
> > >
> > > +/**
> > > + * sandbox() - Unshare IPC, mount, PID, UTS, and user namespaces, "unmount" root
> > > + *
> > > + * Return: negative error code on failure, zero on success
> > > + */
> > > +int sandbox(struct ctx *c)  
> > 
> > Same here, I would "document" "c".  
> 
> Not in scope for this patch, since it's a pure code motion.

Whoops, my bad, I didn't notice.

-- 
Stefano


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 03/10] Consolidate determination of UID/GID to run as
  2022-09-10  7:15     ` David Gibson
@ 2022-09-10 20:43       ` Stefano Brivio
  2022-09-12  9:53         ` David Gibson
  0 siblings, 1 reply; 33+ messages in thread
From: Stefano Brivio @ 2022-09-10 20:43 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 3624 bytes --]

On Sat, 10 Sep 2022 17:15:41 +1000
David Gibson <david(a)gibson.dropbear.id.au> wrote:

> On Fri, Sep 09, 2022 at 04:33:47PM +0200, Stefano Brivio wrote:
> > Also mere nitpicking on this one:
> > 
> > On Thu,  8 Sep 2022 13:59:00 +1000
> > David Gibson <david(a)gibson.dropbear.id.au> wrote:
> >   
> > > Currently the logic to work out what UID and GID we will run as is spread
> > > across conf().  If --runas is specified it's handled in conf_runas(),
> > > otherwise it's handled by check_root(), which depends on initialization of
> > > the uid and gid variables by either conf() itself or conf_runas().
> > > 
> > > Make this clearer by putting all the UID and GID logic into a single
> > > conf_ugid() function.
> > > 
> > > Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au>
> > > ---
> > >  conf.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++------
> > >  util.c | 50 ------------------------------------
> > >  util.h |  1 -
> > >  3 files changed, 73 insertions(+), 59 deletions(-)
> > > 
> > > diff --git a/conf.c b/conf.c
> > > index 545f61d..5c293b5 100644
> > > --- a/conf.c
> > > +++ b/conf.c
> > > @@ -1021,6 +1021,70 @@ static int conf_runas(const char *opt, unsigned int *uid, unsigned int *gid)
> > >  #endif /* !GLIBC_NO_STATIC_NSS */
> > >  }
> > >  
> > > +/**
> > > + * conf_ugid() - Determine UID and GID to run as
> > > + * @runas:	--runas option, may be NULL
> > > + * @uid:	User ID, set on success
> > > + * @gid:	Group ID, set on success
> > > + *
> > > + * Return: 0 on success, negative error code on failure
> > > + */
> > > +static int conf_ugid(const char *runas, uid_t *uid, gid_t *gid)
> > > +{
> > > +	const char root_uid_map[] = "         0          0 4294967295";
> > > +	struct passwd *pw;
> > > +	char buf[BUFSIZ];
> > > +	int ret;
> > > +	int fd;
> > > +
> > > +	/* If user has specified --runas, that takes precedence */
> > > +	if (runas) {
> > > +		ret = conf_runas(runas, uid, gid);
> > > +		if (ret)
> > > +			err("Invalid --runas option: %s", runas);
> > > +		return ret;
> > > +	}
> > > +
> > > +	/* Otherwise default to current user and group.. */
> > > +	*uid = geteuid();
> > > +	*gid = getegid();
> > > +
> > > +	/* ..as long as it's not root.. */
> > > +	if (*uid)
> > > +		return 0;
> > > +
> > > +	/* ..or at least not root in the init namespace.. */
> > > +	if ((fd = open("/proc/self/uid_map", O_RDONLY | O_CLOEXEC)) < 0)
> > > +		return 0;
> > > +
> > > +	if (read(fd, buf, BUFSIZ) != sizeof(root_uid_map) ||
> > > +	    strncmp(buf, root_uid_map, sizeof(root_uid_map) - 1)) {
> > > +		close(fd);
> > > +		return 0;
> > > +	}
> > > +
> > > +	close(fd);
> > > +
> > > +	/* ..otherwise use nobody:nobody */  
> > 
> > I'd change all those comment to use ellipses (...) instead of "..".  
> 
> Ok, nit picked.
> 
> > I think your comments here help a lot, by the way (and I couldn't find
> > a better way to check for UID 0 in non-init other than that hack).  
> 
> Right.  The extra complexity - both of code and of mental model - in
> going from "not UID 0" to "not UID 0 in the init namespace" is what
> makes me really wonder if this check is worth having.

I think it's desirable for two cases (rather important in my opinion):

- running passt with a further isolation implemented by a user
  namespace (e.g. with pasta). There it's not really practical to use a
  non-zero UID, and allowing to do this easily is a relevant security
  feature

- ...if I recall correctly (but I can't check right now) Podman does
  the same

-- 
Stefano


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 08/10] Correctly handle --netns-only in pasta_start_ns()
  2022-09-10  7:25     ` David Gibson
@ 2022-09-11  8:26       ` David Gibson
  2022-09-13  3:50         ` Stefano Brivio
  0 siblings, 1 reply; 33+ messages in thread
From: David Gibson @ 2022-09-11  8:26 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 2082 bytes --]

On Sat, Sep 10, 2022 at 05:25:36PM +1000, David Gibson wrote:
> On Fri, Sep 09, 2022 at 04:34:20PM +0200, Stefano Brivio wrote:
> > On Thu,  8 Sep 2022 13:59:05 +1000
> > David Gibson <david(a)gibson.dropbear.id.au> wrote:
> > 
> > > --netns-only is supposed to make pasta use only a network namespace, not
> > > a user namespace.  However, pasta_start_ns() has this backwards, and if
> > > --netns-only is specified it creates a user namespace but *not* a network
> > > namespace.  Correct this.
> > > 
> > > Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au>
> > > ---
> > >  pasta.c | 4 ++--
> > >  1 file changed, 2 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/pasta.c b/pasta.c
> > > index 0fd45e4..7eac8e9 100644
> > > --- a/pasta.c
> > > +++ b/pasta.c
> > > @@ -244,8 +244,8 @@ void pasta_start_ns(struct ctx *c, int argc, char *argv[])
> > >  
> > >  	pasta_child_pid = clone(pasta_setup_ns,
> > >  				ns_fn_stack + sizeof(ns_fn_stack) / 2,
> > > -				(c->netns_only ? 0 : CLONE_NEWNET) |
> > > -				CLONE_NEWIPC | CLONE_NEWPID | CLONE_NEWUSER |
> > > +				(c->netns_only ? 0 : CLONE_NEWUSER) |
> > > +				CLONE_NEWIPC | CLONE_NEWPID | CLONE_NEWNET |
> > 
> > Oh, funny, so it never worked in this case.
> > 
> > I thought anyway your plan was to drop --netns-only altogether, and if
> > a --netns option is specified without --userns, then it's implied. Is
> > that still on the table (outside the scope of this series I presume)?
> 
> Well.. having reduced the lifetome of netns_only to within conf()
> alone, I felt a lot less urgency about removing it entirely.  It might
> still be nicer to remove it anyway; I'll have another look.

Actually, I realized there is a reason to leave it in: just using
--netns doesn't work with spawning a command, and spawning a command
with netns but no userns might be useful.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 03/10] Consolidate determination of UID/GID to run as
  2022-09-10 20:43       ` Stefano Brivio
@ 2022-09-12  9:53         ` David Gibson
  2022-09-13  3:49           ` Stefano Brivio
  0 siblings, 1 reply; 33+ messages in thread
From: David Gibson @ 2022-09-12  9:53 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 4496 bytes --]

On Sat, Sep 10, 2022 at 10:43:31PM +0200, Stefano Brivio wrote:
> On Sat, 10 Sep 2022 17:15:41 +1000
> David Gibson <david(a)gibson.dropbear.id.au> wrote:
> 
> > On Fri, Sep 09, 2022 at 04:33:47PM +0200, Stefano Brivio wrote:
> > > Also mere nitpicking on this one:
> > > 
> > > On Thu,  8 Sep 2022 13:59:00 +1000
> > > David Gibson <david(a)gibson.dropbear.id.au> wrote:
> > >   
> > > > Currently the logic to work out what UID and GID we will run as is spread
> > > > across conf().  If --runas is specified it's handled in conf_runas(),
> > > > otherwise it's handled by check_root(), which depends on initialization of
> > > > the uid and gid variables by either conf() itself or conf_runas().
> > > > 
> > > > Make this clearer by putting all the UID and GID logic into a single
> > > > conf_ugid() function.
> > > > 
> > > > Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au>
> > > > ---
> > > >  conf.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++------
> > > >  util.c | 50 ------------------------------------
> > > >  util.h |  1 -
> > > >  3 files changed, 73 insertions(+), 59 deletions(-)
> > > > 
> > > > diff --git a/conf.c b/conf.c
> > > > index 545f61d..5c293b5 100644
> > > > --- a/conf.c
> > > > +++ b/conf.c
> > > > @@ -1021,6 +1021,70 @@ static int conf_runas(const char *opt, unsigned int *uid, unsigned int *gid)
> > > >  #endif /* !GLIBC_NO_STATIC_NSS */
> > > >  }
> > > >  
> > > > +/**
> > > > + * conf_ugid() - Determine UID and GID to run as
> > > > + * @runas:	--runas option, may be NULL
> > > > + * @uid:	User ID, set on success
> > > > + * @gid:	Group ID, set on success
> > > > + *
> > > > + * Return: 0 on success, negative error code on failure
> > > > + */
> > > > +static int conf_ugid(const char *runas, uid_t *uid, gid_t *gid)
> > > > +{
> > > > +	const char root_uid_map[] = "         0          0 4294967295";
> > > > +	struct passwd *pw;
> > > > +	char buf[BUFSIZ];
> > > > +	int ret;
> > > > +	int fd;
> > > > +
> > > > +	/* If user has specified --runas, that takes precedence */
> > > > +	if (runas) {
> > > > +		ret = conf_runas(runas, uid, gid);
> > > > +		if (ret)
> > > > +			err("Invalid --runas option: %s", runas);
> > > > +		return ret;
> > > > +	}
> > > > +
> > > > +	/* Otherwise default to current user and group.. */
> > > > +	*uid = geteuid();
> > > > +	*gid = getegid();
> > > > +
> > > > +	/* ..as long as it's not root.. */
> > > > +	if (*uid)
> > > > +		return 0;
> > > > +
> > > > +	/* ..or at least not root in the init namespace.. */
> > > > +	if ((fd = open("/proc/self/uid_map", O_RDONLY | O_CLOEXEC)) < 0)
> > > > +		return 0;
> > > > +
> > > > +	if (read(fd, buf, BUFSIZ) != sizeof(root_uid_map) ||
> > > > +	    strncmp(buf, root_uid_map, sizeof(root_uid_map) - 1)) {
> > > > +		close(fd);
> > > > +		return 0;
> > > > +	}
> > > > +
> > > > +	close(fd);
> > > > +
> > > > +	/* ..otherwise use nobody:nobody */  
> > > 
> > > I'd change all those comment to use ellipses (...) instead of "..".  
> > 
> > Ok, nit picked.
> > 
> > > I think your comments here help a lot, by the way (and I couldn't find
> > > a better way to check for UID 0 in non-init other than that hack).  
> > 
> > Right.  The extra complexity - both of code and of mental model - in
> > going from "not UID 0" to "not UID 0 in the init namespace" is what
> > makes me really wonder if this check is worth having.
> 
> I think it's desirable for two cases (rather important in my opinion):
> 
> - running passt with a further isolation implemented by a user
>   namespace (e.g. with pasta). There it's not really practical to use a
>   non-zero UID, and allowing to do this easily is a relevant security
>   feature
> 
> - ...if I recall correctly (but I can't check right now) Podman does
>   the same

Sorry, I realize I wasn't clear.  We absolutely need the ability to
run as "root" (UID 0) within a user namespace.  What I'm questioning
is given that whether it's worth preventing running when UID 0 outside
a user namespace (as far as we can tell).  There's arguably some edge
cases where it might be useful, and it's debatable whether it's
passt's job to prevent the user from shooting themselves in the foot
in this way.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 03/10] Consolidate determination of UID/GID to run as
  2022-09-12  9:53         ` David Gibson
@ 2022-09-13  3:49           ` Stefano Brivio
  2022-09-13  5:20             ` David Gibson
  0 siblings, 1 reply; 33+ messages in thread
From: Stefano Brivio @ 2022-09-13  3:49 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 5502 bytes --]

On Mon, 12 Sep 2022 19:53:38 +1000
David Gibson <david(a)gibson.dropbear.id.au> wrote:

> On Sat, Sep 10, 2022 at 10:43:31PM +0200, Stefano Brivio wrote:
> > On Sat, 10 Sep 2022 17:15:41 +1000
> > David Gibson <david(a)gibson.dropbear.id.au> wrote:
> >   
> > > On Fri, Sep 09, 2022 at 04:33:47PM +0200, Stefano Brivio wrote:  
> > > > Also mere nitpicking on this one:
> > > > 
> > > > On Thu,  8 Sep 2022 13:59:00 +1000
> > > > David Gibson <david(a)gibson.dropbear.id.au> wrote:
> > > >     
> > > > > Currently the logic to work out what UID and GID we will run as is spread
> > > > > across conf().  If --runas is specified it's handled in conf_runas(),
> > > > > otherwise it's handled by check_root(), which depends on initialization of
> > > > > the uid and gid variables by either conf() itself or conf_runas().
> > > > > 
> > > > > Make this clearer by putting all the UID and GID logic into a single
> > > > > conf_ugid() function.
> > > > > 
> > > > > Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au>
> > > > > ---
> > > > >  conf.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++------
> > > > >  util.c | 50 ------------------------------------
> > > > >  util.h |  1 -
> > > > >  3 files changed, 73 insertions(+), 59 deletions(-)
> > > > > 
> > > > > diff --git a/conf.c b/conf.c
> > > > > index 545f61d..5c293b5 100644
> > > > > --- a/conf.c
> > > > > +++ b/conf.c
> > > > > @@ -1021,6 +1021,70 @@ static int conf_runas(const char *opt, unsigned int *uid, unsigned int *gid)
> > > > >  #endif /* !GLIBC_NO_STATIC_NSS */
> > > > >  }
> > > > >  
> > > > > +/**
> > > > > + * conf_ugid() - Determine UID and GID to run as
> > > > > + * @runas:	--runas option, may be NULL
> > > > > + * @uid:	User ID, set on success
> > > > > + * @gid:	Group ID, set on success
> > > > > + *
> > > > > + * Return: 0 on success, negative error code on failure
> > > > > + */
> > > > > +static int conf_ugid(const char *runas, uid_t *uid, gid_t *gid)
> > > > > +{
> > > > > +	const char root_uid_map[] = "         0          0 4294967295";
> > > > > +	struct passwd *pw;
> > > > > +	char buf[BUFSIZ];
> > > > > +	int ret;
> > > > > +	int fd;
> > > > > +
> > > > > +	/* If user has specified --runas, that takes precedence */
> > > > > +	if (runas) {
> > > > > +		ret = conf_runas(runas, uid, gid);
> > > > > +		if (ret)
> > > > > +			err("Invalid --runas option: %s", runas);
> > > > > +		return ret;
> > > > > +	}
> > > > > +
> > > > > +	/* Otherwise default to current user and group.. */
> > > > > +	*uid = geteuid();
> > > > > +	*gid = getegid();
> > > > > +
> > > > > +	/* ..as long as it's not root.. */
> > > > > +	if (*uid)
> > > > > +		return 0;
> > > > > +
> > > > > +	/* ..or at least not root in the init namespace.. */
> > > > > +	if ((fd = open("/proc/self/uid_map", O_RDONLY | O_CLOEXEC)) < 0)
> > > > > +		return 0;
> > > > > +
> > > > > +	if (read(fd, buf, BUFSIZ) != sizeof(root_uid_map) ||
> > > > > +	    strncmp(buf, root_uid_map, sizeof(root_uid_map) - 1)) {
> > > > > +		close(fd);
> > > > > +		return 0;
> > > > > +	}
> > > > > +
> > > > > +	close(fd);
> > > > > +
> > > > > +	/* ..otherwise use nobody:nobody */    
> > > > 
> > > > I'd change all those comment to use ellipses (...) instead of "..".    
> > > 
> > > Ok, nit picked.
> > >   
> > > > I think your comments here help a lot, by the way (and I couldn't find
> > > > a better way to check for UID 0 in non-init other than that hack).    
> > > 
> > > Right.  The extra complexity - both of code and of mental model - in
> > > going from "not UID 0" to "not UID 0 in the init namespace" is what
> > > makes me really wonder if this check is worth having.  
> > 
> > I think it's desirable for two cases (rather important in my opinion):
> > 
> > - running passt with a further isolation implemented by a user
> >   namespace (e.g. with pasta). There it's not really practical to use a
> >   non-zero UID, and allowing to do this easily is a relevant security
> >   feature
> > 
> > - ...if I recall correctly (but I can't check right now) Podman does
> >   the same  
> 
> Sorry, I realize I wasn't clear.  We absolutely need the ability to
> run as "root" (UID 0) within a user namespace.  What I'm questioning
> is given that whether it's worth preventing running when UID 0 outside
> a user namespace (as far as we can tell).  There's arguably some edge
> cases where it might be useful, and it's debatable whether it's
> passt's job to prevent the user from shooting themselves in the foot
> in this way.

Ah, I see now.

Well, I also think it's debatable, and I'd even tend to say it's not
actually passt's job, but I still think there are three reasons to keep
doing this:

- user mistakes happen, and it's also arguably our job to make usage
  less error-prone

- if users run this as root, we won't actually run as root, so we
  obtain an essentially equivalent level of security while letting
  lazy/distracted users do whatever... compared to the alternative, it
  sounds appealing

- given that it's debatable (for instance, many other tools and daemons
  do the same), I'd keep erring on the side of caution, as this might
  significantly decide the perceived, or factual, severity of any
  vulnerability that might be found

I also guess that forcing users to rebuild from source if they want to
do is reasonable for those edge cases.

-- 
Stefano


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 08/10] Correctly handle --netns-only in pasta_start_ns()
  2022-09-11  8:26       ` David Gibson
@ 2022-09-13  3:50         ` Stefano Brivio
  0 siblings, 0 replies; 33+ messages in thread
From: Stefano Brivio @ 2022-09-13  3:50 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 2162 bytes --]

On Sun, 11 Sep 2022 18:26:31 +1000
David Gibson <david(a)gibson.dropbear.id.au> wrote:

> On Sat, Sep 10, 2022 at 05:25:36PM +1000, David Gibson wrote:
> > On Fri, Sep 09, 2022 at 04:34:20PM +0200, Stefano Brivio wrote:  
> > > On Thu,  8 Sep 2022 13:59:05 +1000
> > > David Gibson <david(a)gibson.dropbear.id.au> wrote:
> > >   
> > > > --netns-only is supposed to make pasta use only a network namespace, not
> > > > a user namespace.  However, pasta_start_ns() has this backwards, and if
> > > > --netns-only is specified it creates a user namespace but *not* a network
> > > > namespace.  Correct this.
> > > > 
> > > > Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au>
> > > > ---
> > > >  pasta.c | 4 ++--
> > > >  1 file changed, 2 insertions(+), 2 deletions(-)
> > > > 
> > > > diff --git a/pasta.c b/pasta.c
> > > > index 0fd45e4..7eac8e9 100644
> > > > --- a/pasta.c
> > > > +++ b/pasta.c
> > > > @@ -244,8 +244,8 @@ void pasta_start_ns(struct ctx *c, int argc, char *argv[])
> > > >  
> > > >  	pasta_child_pid = clone(pasta_setup_ns,
> > > >  				ns_fn_stack + sizeof(ns_fn_stack) / 2,
> > > > -				(c->netns_only ? 0 : CLONE_NEWNET) |
> > > > -				CLONE_NEWIPC | CLONE_NEWPID | CLONE_NEWUSER |
> > > > +				(c->netns_only ? 0 : CLONE_NEWUSER) |
> > > > +				CLONE_NEWIPC | CLONE_NEWPID | CLONE_NEWNET |  
> > > 
> > > Oh, funny, so it never worked in this case.
> > > 
> > > I thought anyway your plan was to drop --netns-only altogether, and if
> > > a --netns option is specified without --userns, then it's implied. Is
> > > that still on the table (outside the scope of this series I presume)?  
> > 
> > Well.. having reduced the lifetome of netns_only to within conf()
> > alone, I felt a lot less urgency about removing it entirely.  It might
> > still be nicer to remove it anyway; I'll have another look.  
> 
> Actually, I realized there is a reason to leave it in: just using
> --netns doesn't work with spawning a command, and spawning a command
> with netns but no userns might be useful.

Right, thanks for noticing, that combination didn't occur to me at all.

-- 
Stefano


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 03/10] Consolidate determination of UID/GID to run as
  2022-09-13  3:49           ` Stefano Brivio
@ 2022-09-13  5:20             ` David Gibson
  0 siblings, 0 replies; 33+ messages in thread
From: David Gibson @ 2022-09-13  5:20 UTC (permalink / raw)
  To: passt-dev

[-- Attachment #1: Type: text/plain, Size: 6046 bytes --]

On Tue, Sep 13, 2022 at 05:49:52AM +0200, Stefano Brivio wrote:
> On Mon, 12 Sep 2022 19:53:38 +1000
> David Gibson <david(a)gibson.dropbear.id.au> wrote:
> 
> > On Sat, Sep 10, 2022 at 10:43:31PM +0200, Stefano Brivio wrote:
> > > On Sat, 10 Sep 2022 17:15:41 +1000
> > > David Gibson <david(a)gibson.dropbear.id.au> wrote:
> > >   
> > > > On Fri, Sep 09, 2022 at 04:33:47PM +0200, Stefano Brivio wrote:  
> > > > > Also mere nitpicking on this one:
> > > > > 
> > > > > On Thu,  8 Sep 2022 13:59:00 +1000
> > > > > David Gibson <david(a)gibson.dropbear.id.au> wrote:
> > > > >     
> > > > > > Currently the logic to work out what UID and GID we will run as is spread
> > > > > > across conf().  If --runas is specified it's handled in conf_runas(),
> > > > > > otherwise it's handled by check_root(), which depends on initialization of
> > > > > > the uid and gid variables by either conf() itself or conf_runas().
> > > > > > 
> > > > > > Make this clearer by putting all the UID and GID logic into a single
> > > > > > conf_ugid() function.
> > > > > > 
> > > > > > Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au>
> > > > > > ---
> > > > > >  conf.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++------
> > > > > >  util.c | 50 ------------------------------------
> > > > > >  util.h |  1 -
> > > > > >  3 files changed, 73 insertions(+), 59 deletions(-)
> > > > > > 
> > > > > > diff --git a/conf.c b/conf.c
> > > > > > index 545f61d..5c293b5 100644
> > > > > > --- a/conf.c
> > > > > > +++ b/conf.c
> > > > > > @@ -1021,6 +1021,70 @@ static int conf_runas(const char *opt, unsigned int *uid, unsigned int *gid)
> > > > > >  #endif /* !GLIBC_NO_STATIC_NSS */
> > > > > >  }
> > > > > >  
> > > > > > +/**
> > > > > > + * conf_ugid() - Determine UID and GID to run as
> > > > > > + * @runas:	--runas option, may be NULL
> > > > > > + * @uid:	User ID, set on success
> > > > > > + * @gid:	Group ID, set on success
> > > > > > + *
> > > > > > + * Return: 0 on success, negative error code on failure
> > > > > > + */
> > > > > > +static int conf_ugid(const char *runas, uid_t *uid, gid_t *gid)
> > > > > > +{
> > > > > > +	const char root_uid_map[] = "         0          0 4294967295";
> > > > > > +	struct passwd *pw;
> > > > > > +	char buf[BUFSIZ];
> > > > > > +	int ret;
> > > > > > +	int fd;
> > > > > > +
> > > > > > +	/* If user has specified --runas, that takes precedence */
> > > > > > +	if (runas) {
> > > > > > +		ret = conf_runas(runas, uid, gid);
> > > > > > +		if (ret)
> > > > > > +			err("Invalid --runas option: %s", runas);
> > > > > > +		return ret;
> > > > > > +	}
> > > > > > +
> > > > > > +	/* Otherwise default to current user and group.. */
> > > > > > +	*uid = geteuid();
> > > > > > +	*gid = getegid();
> > > > > > +
> > > > > > +	/* ..as long as it's not root.. */
> > > > > > +	if (*uid)
> > > > > > +		return 0;
> > > > > > +
> > > > > > +	/* ..or at least not root in the init namespace.. */
> > > > > > +	if ((fd = open("/proc/self/uid_map", O_RDONLY | O_CLOEXEC)) < 0)
> > > > > > +		return 0;
> > > > > > +
> > > > > > +	if (read(fd, buf, BUFSIZ) != sizeof(root_uid_map) ||
> > > > > > +	    strncmp(buf, root_uid_map, sizeof(root_uid_map) - 1)) {
> > > > > > +		close(fd);
> > > > > > +		return 0;
> > > > > > +	}
> > > > > > +
> > > > > > +	close(fd);
> > > > > > +
> > > > > > +	/* ..otherwise use nobody:nobody */    
> > > > > 
> > > > > I'd change all those comment to use ellipses (...) instead of "..".    
> > > > 
> > > > Ok, nit picked.
> > > >   
> > > > > I think your comments here help a lot, by the way (and I couldn't find
> > > > > a better way to check for UID 0 in non-init other than that hack).    
> > > > 
> > > > Right.  The extra complexity - both of code and of mental model - in
> > > > going from "not UID 0" to "not UID 0 in the init namespace" is what
> > > > makes me really wonder if this check is worth having.  
> > > 
> > > I think it's desirable for two cases (rather important in my opinion):
> > > 
> > > - running passt with a further isolation implemented by a user
> > >   namespace (e.g. with pasta). There it's not really practical to use a
> > >   non-zero UID, and allowing to do this easily is a relevant security
> > >   feature
> > > 
> > > - ...if I recall correctly (but I can't check right now) Podman does
> > >   the same  
> > 
> > Sorry, I realize I wasn't clear.  We absolutely need the ability to
> > run as "root" (UID 0) within a user namespace.  What I'm questioning
> > is given that whether it's worth preventing running when UID 0 outside
> > a user namespace (as far as we can tell).  There's arguably some edge
> > cases where it might be useful, and it's debatable whether it's
> > passt's job to prevent the user from shooting themselves in the foot
> > in this way.
> 
> Ah, I see now.
> 
> Well, I also think it's debatable, and I'd even tend to say it's not
> actually passt's job, but I still think there are three reasons to keep
> doing this:
> 
> - user mistakes happen, and it's also arguably our job to make usage
>   less error-prone
> 
> - if users run this as root, we won't actually run as root, so we
>   obtain an essentially equivalent level of security while letting
>   lazy/distracted users do whatever... compared to the alternative, it
>   sounds appealing
> 
> - given that it's debatable (for instance, many other tools and daemons
>   do the same), I'd keep erring on the side of caution, as this might
>   significantly decide the perceived, or factual, severity of any
>   vulnerability that might be found
> 
> I also guess that forcing users to rebuild from source if they want to
> do is reasonable for those edge cases.

Yeah, I guess that's fair enough.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2022-09-13  5:20 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-08  3:58 [PATCH v2 00/10] Clean up handling of userns David Gibson
2022-09-08  3:58 ` [PATCH v2 01/10] Don't store UID & GID persistently in the context structure David Gibson
2022-09-08  3:58 ` [PATCH v2 02/10] Split checking for root from dropping root privilege David Gibson
2022-09-09 14:33   ` Stefano Brivio
2022-09-10  7:09     ` David Gibson
2022-09-08  3:59 ` [PATCH v2 03/10] Consolidate determination of UID/GID to run as David Gibson
2022-09-09 14:33   ` Stefano Brivio
2022-09-10  7:15     ` David Gibson
2022-09-10 20:43       ` Stefano Brivio
2022-09-12  9:53         ` David Gibson
2022-09-13  3:49           ` Stefano Brivio
2022-09-13  5:20             ` David Gibson
2022-09-08  3:59 ` [PATCH v2 04/10] Safer handling if we can't open /proc/self/uid_map David Gibson
2022-09-09 14:33   ` Stefano Brivio
2022-09-10  7:23     ` David Gibson
2022-09-08  3:59 ` [PATCH v2 05/10] Move self-isolation code into a separate file David Gibson
2022-09-09 14:33   ` Stefano Brivio
2022-09-10  7:23     ` David Gibson
2022-09-10 20:43       ` Stefano Brivio
2022-09-08  3:59 ` [PATCH v2 06/10] Consolidate validation of pasta namespace options David Gibson
2022-09-08  3:59 ` [PATCH v2 07/10] Clean up and rename conf_ns_open() David Gibson
2022-09-08  3:59 ` [PATCH v2 08/10] Correctly handle --netns-only in pasta_start_ns() David Gibson
2022-09-09 14:34   ` Stefano Brivio
2022-09-10  7:25     ` David Gibson
2022-09-11  8:26       ` David Gibson
2022-09-13  3:50         ` Stefano Brivio
2022-09-08  3:59 ` [PATCH v2 09/10] Handle userns isolation and dropping root at the same time David Gibson
2022-09-08  3:59 ` [PATCH v2 10/10] Allow --userns when pasta spawns a command David Gibson
2022-09-09 14:34   ` Stefano Brivio
2022-09-10  7:29     ` David Gibson
2022-09-10 20:42       ` Stefano Brivio
2022-09-09 14:36 ` [PATCH v2 00/10] Clean up handling of userns Stefano Brivio
2022-09-10  7:30   ` David Gibson

Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).