public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: passt-dev@passt.top
Subject: [PATCH v2 05/10] Move self-isolation code into a separate file
Date: Thu, 08 Sep 2022 13:59:02 +1000	[thread overview]
Message-ID: <20220908035907.1750314-6-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20220908035907.1750314-1-david@gibson.dropbear.id.au>

[-- Attachment #1: Type: text/plain, Size: 12646 bytes --]

passt/pasta contains a number of routines designed to isolate passt from
the rest of the system for security.  These are spread through util.c and
passt.c.  Move them together into a new isolation.c file.

Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au>
---
 Makefile    |   8 +--
 conf.c      |   1 +
 isolation.c | 165 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 isolation.h |  15 +++++
 passt.c     | 113 +----------------------------------
 pasta.c     |   1 +
 util.c      |  49 ----------------
 util.h      |   2 -
 8 files changed, 187 insertions(+), 167 deletions(-)
 create mode 100644 isolation.c
 create mode 100644 isolation.h

diff --git a/Makefile b/Makefile
index 644a541..af3d1ff 100644
--- a/Makefile
+++ b/Makefile
@@ -32,16 +32,16 @@ CFLAGS += -DRLIMIT_STACK_VAL=$(RLIMIT_STACK_VAL)
 CFLAGS += -DARCH=\"$(TARGET_ARCH)\"
 
 PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c icmp.c igmp.c \
-	lineread.c mld.c ndp.c netlink.c packet.c passt.c pasta.c pcap.c \
-	siphash.c tap.c tcp.c tcp_splice.c udp.c util.c
+	isolation.c lineread.c mld.c ndp.c netlink.c packet.c passt.c pasta.c \
+	pcap.c siphash.c tap.c tcp.c tcp_splice.c udp.c util.c
 QRAP_SRCS = qrap.c
 SRCS = $(PASST_SRCS) $(QRAP_SRCS)
 
 MANPAGES = passt.1 pasta.1 qrap.1
 
 PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h icmp.h \
-	lineread.h ndp.h netlink.h packet.h passt.h pasta.h pcap.h \
-	siphash.h tap.h tcp.h tcp_splice.h udp.h util.h
+	isolation.h lineread.h ndp.h netlink.h packet.h passt.h pasta.h \
+	pcap.h siphash.h tap.h tcp.h tcp_splice.h udp.h util.h
 HEADERS = $(PASST_HEADERS)
 
 # On gcc 11.2, with -O2 and -flto, tcp_hash() and siphash_20b(), if inlined,
diff --git a/conf.c b/conf.c
index f1aaa8a..08a2106 100644
--- a/conf.c
+++ b/conf.c
@@ -40,6 +40,7 @@
 #include "tcp.h"
 #include "pasta.h"
 #include "lineread.h"
+#include "isolation.h"
 
 /**
  * get_bound_ports() - Get maps of ports with bound sockets
diff --git a/isolation.c b/isolation.c
new file mode 100644
index 0000000..bc8240f
--- /dev/null
+++ b/isolation.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+/* PASST - Plug A Simple Socket Transport
+ *  for qemu/UNIX domain socket mode
+ *
+ * PASTA - Pack A Subtle Tap Abstraction
+ *  for network namespace/tap device mode
+ *
+ * isolation.c - Self isolation helpers
+ *
+ * Copyright Red Hat
+ * Author: Stefano Brivio <sbrivio(a)redhat.com>
+ * Author: David Gibson <david(a)gibson.dropbear.id.au>
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <grp.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <pwd.h>
+#include <sched.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+
+#include <linux/audit.h>
+#include <linux/capability.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+
+#include "util.h"
+#include "seccomp.h"
+#include "passt.h"
+#include "isolation.h"
+
+/**
+ * drop_caps() - Drop capabilities we might have except for CAP_NET_BIND_SERVICE
+ */
+void drop_caps(void)
+{
+	int i;
+
+	for (i = 0; i < 64; i++) {
+		if (i == CAP_NET_BIND_SERVICE)
+			continue;
+
+		prctl(PR_CAPBSET_DROP, i, 0, 0, 0);
+	}
+}
+
+/**
+ * drop_root() - Switch to given UID and GID
+ */
+void drop_root(uid_t uid, gid_t gid)
+{
+	if (setgroups(0, NULL)) {
+		/* If we don't start with CAP_SETGID, this will EPERM */
+		if (errno != EPERM) {
+			err("Can't drop supplementary groups: %s",
+			    strerror(errno));
+			exit(EXIT_FAILURE);
+		}
+	}
+
+	if (!setgid(gid) && !setuid(uid))
+		return;
+
+	err("Can't change user/group, exiting");
+	exit(EXIT_FAILURE);
+}
+
+/**
+ * sandbox() - Unshare IPC, mount, PID, UTS, and user namespaces, "unmount" root
+ *
+ * Return: negative error code on failure, zero on success
+ */
+int sandbox(struct ctx *c)
+{
+	int flags = CLONE_NEWIPC | CLONE_NEWNS | CLONE_NEWUTS;
+
+	if (!c->netns_only) {
+		if (c->pasta_userns_fd == -1)
+			flags |= CLONE_NEWUSER;
+		else
+			setns(c->pasta_userns_fd, CLONE_NEWUSER);
+	}
+
+	c->pasta_userns_fd = -1;
+
+	/* If we run in foreground, we have no chance to actually move to a new
+	 * PID namespace. For passt, use CLONE_NEWPID anyway, in case somebody
+	 * ever gets around seccomp profiles -- there's no harm in passing it.
+	 */
+	if (!c->foreground || c->mode == MODE_PASST)
+		flags |= CLONE_NEWPID;
+
+	if (unshare(flags)) {
+		perror("unshare");
+		return -errno;
+	}
+
+	if (mount("", "/", "", MS_UNBINDABLE | MS_REC, NULL)) {
+		perror("mount /");
+		return -errno;
+	}
+
+	if (mount("", TMPDIR, "tmpfs",
+		  MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RDONLY,
+		  "nr_inodes=2,nr_blocks=0")) {
+		perror("mount tmpfs");
+		return -errno;
+	}
+
+	if (chdir(TMPDIR)) {
+		perror("chdir");
+		return -errno;
+	}
+
+	if (syscall(SYS_pivot_root, ".", ".")) {
+		perror("pivot_root");
+		return -errno;
+	}
+
+	if (umount2(".", MNT_DETACH | UMOUNT_NOFOLLOW)) {
+		perror("umount2");
+		return -errno;
+	}
+
+	drop_caps();	/* Relative to the new user namespace this time. */
+
+	return 0;
+}
+
+/**
+ * seccomp() - Set up seccomp filters depending on mode, won't return on failure
+ * @c:		Execution context
+ */
+void seccomp(const struct ctx *c)
+{
+	struct sock_fprog prog;
+
+	if (c->mode == MODE_PASST) {
+		prog.len = (unsigned short)ARRAY_SIZE(filter_passt);
+		prog.filter = filter_passt;
+	} else {
+		prog.len = (unsigned short)ARRAY_SIZE(filter_pasta);
+		prog.filter = filter_pasta;
+	}
+
+	if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) ||
+	    prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
+		perror("prctl");
+		exit(EXIT_FAILURE);
+	}
+}
diff --git a/isolation.h b/isolation.h
new file mode 100644
index 0000000..2540a35
--- /dev/null
+++ b/isolation.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: AGPL-3.0-or-later
+ * Copyright Red Hat
+ * Author: Stefano Brivio <sbrivio(a)redhat.com>
+ * Author: David Gibson <david(a)gibson.dropbear.id.au>
+ */
+
+#ifndef ISOLATION_H
+#define ISOLATION_H
+
+void drop_caps(void);
+void drop_root(uid_t uid, gid_t gid);
+int sandbox(struct ctx *c);
+void seccomp(const struct ctx *c);
+
+#endif /* ISOLATION_H */
diff --git a/passt.c b/passt.c
index bbf53d9..2a8314c 100644
--- a/passt.c
+++ b/passt.c
@@ -19,51 +19,25 @@
  * created in a separate network namespace).
  */
 
-#include <sched.h>
-#include <stdio.h>
 #include <sys/epoll.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <dirent.h>
 #include <fcntl.h>
 #include <sys/mman.h>
 #include <sys/resource.h>
-#include <sys/uio.h>
-#include <sys/syscall.h>
-#include <sys/wait.h>
-#include <sys/mount.h>
-#include <netinet/ip.h>
-#include <net/ethernet.h>
-#include <libgen.h>
 #include <stdlib.h>
 #include <unistd.h>
-#include <net/if.h>
 #include <netdb.h>
 #include <string.h>
 #include <errno.h>
 #include <time.h>
 #include <syslog.h>
-#include <sys/stat.h>
 #include <sys/prctl.h>
-#include <stddef.h>
-#include <netinet/udp.h>
-#include <netinet/tcp.h>
 #include <netinet/if_ether.h>
 
-#include <linux/seccomp.h>
-#include <linux/audit.h>
-#include <linux/filter.h>
-#include <linux/icmpv6.h>
-
 #include "util.h"
-#include "seccomp.h"
 #include "passt.h"
 #include "dhcp.h"
 #include "dhcpv6.h"
-#include "icmp.h"
-#include "tcp.h"
-#include "udp.h"
+#include "isolation.h"
 #include "pcap.h"
 #include "tap.h"
 #include "conf.h"
@@ -166,91 +140,6 @@ void proto_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s,
 	udp_update_l2_buf(eth_d, eth_s, ip_da);
 }
 
-/**
- * seccomp() - Set up seccomp filters depending on mode, won't return on failure
- * @c:		Execution context
- */
-static void seccomp(const struct ctx *c)
-{
-	struct sock_fprog prog;
-
-	if (c->mode == MODE_PASST) {
-		prog.len = (unsigned short)ARRAY_SIZE(filter_passt);
-		prog.filter = filter_passt;
-	} else {
-		prog.len = (unsigned short)ARRAY_SIZE(filter_pasta);
-		prog.filter = filter_pasta;
-	}
-
-	if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) ||
-	    prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
-		perror("prctl");
-		exit(EXIT_FAILURE);
-	}
-}
-
-/**
- * sandbox() - Unshare IPC, mount, PID, UTS, and user namespaces, "unmount" root
- *
- * Return: negative error code on failure, zero on success
- */
-static int sandbox(struct ctx *c)
-{
-	int flags = CLONE_NEWIPC | CLONE_NEWNS | CLONE_NEWUTS;
-
-	if (!c->netns_only) {
-		if (c->pasta_userns_fd == -1)
-			flags |= CLONE_NEWUSER;
-		else
-			setns(c->pasta_userns_fd, CLONE_NEWUSER);
-	}
-
-	c->pasta_userns_fd = -1;
-
-	/* If we run in foreground, we have no chance to actually move to a new
-	 * PID namespace. For passt, use CLONE_NEWPID anyway, in case somebody
-	 * ever gets around seccomp profiles -- there's no harm in passing it.
-	 */
-	if (!c->foreground || c->mode == MODE_PASST)
-		flags |= CLONE_NEWPID;
-
-	if (unshare(flags)) {
-		perror("unshare");
-		return -errno;
-	}
-
-	if (mount("", "/", "", MS_UNBINDABLE | MS_REC, NULL)) {
-		perror("mount /");
-		return -errno;
-	}
-
-	if (mount("", TMPDIR, "tmpfs",
-		  MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RDONLY,
-		  "nr_inodes=2,nr_blocks=0")) {
-		perror("mount tmpfs");
-		return -errno;
-	}
-
-	if (chdir(TMPDIR)) {
-		perror("chdir");
-		return -errno;
-	}
-
-	if (syscall(SYS_pivot_root, ".", ".")) {
-		perror("pivot_root");
-		return -errno;
-	}
-
-	if (umount2(".", MNT_DETACH | UMOUNT_NOFOLLOW)) {
-		perror("umount2");
-		return -errno;
-	}
-
-	drop_caps();	/* Relative to the new user namespace this time. */
-
-	return 0;
-}
-
 /**
  * exit_handler() - Signal handler for SIGQUIT and SIGTERM
  * @unused:	Unused, handler deals with SIGQUIT and SIGTERM only
diff --git a/pasta.c b/pasta.c
index a844af2..0bdb655 100644
--- a/pasta.c
+++ b/pasta.c
@@ -40,6 +40,7 @@
 
 #include "util.h"
 #include "passt.h"
+#include "isolation.h"
 #include "netlink.h"
 
 /* PID of child, in case we created a namespace */
diff --git a/util.c b/util.c
index 654410f..f709838 100644
--- a/util.c
+++ b/util.c
@@ -13,30 +13,17 @@
  */
 
 #include <sched.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <stddef.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <arpa/inet.h>
 #include <net/ethernet.h>
-#include <net/if.h>
-#include <netinet/tcp.h>
-#include <netinet/udp.h>
 #include <sys/epoll.h>
-#include <sys/prctl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
 #include <fcntl.h>
 #include <syslog.h>
 #include <stdarg.h>
 #include <string.h>
 #include <time.h>
 #include <errno.h>
-#include <pwd.h>
-#include <grp.h>
-
-#include <linux/capability.h>
 
 #include "util.h"
 #include "passt.h"
@@ -467,42 +454,6 @@ void procfs_scan_listen(struct ctx *c, uint8_t proto, int ip_version, int ns,
 	}
 }
 
-/**
- * drop_caps() - Drop capabilities we might have except for CAP_NET_BIND_SERVICE
- */
-void drop_caps(void)
-{
-	int i;
-
-	for (i = 0; i < 64; i++) {
-		if (i == CAP_NET_BIND_SERVICE)
-			continue;
-
-		prctl(PR_CAPBSET_DROP, i, 0, 0, 0);
-	}
-}
-
-/**
- * drop_root() - Switch to given UID and GID
- */
-void drop_root(uid_t uid, gid_t gid)
-{
-	if (setgroups(0, NULL)) {
-		/* If we don't start with CAP_SETGID, this will EPERM */
-		if (errno != EPERM) {
-			err("Can't drop supplementary groups: %s",
-			    strerror(errno));
-			exit(EXIT_FAILURE);
-		}
-	}
-
-	if (!setgid(gid) && !setuid(uid))
-		return;
-
-	err("Can't change user/group, exiting");
-	exit(EXIT_FAILURE);
-}
-
 /**
  * ns_enter() - Enter configured user (unless already joined) and network ns
  * @c:		Execution context
diff --git a/util.h b/util.h
index 9626cb5..1003303 100644
--- a/util.h
+++ b/util.h
@@ -233,8 +233,6 @@ int bitmap_isset(const uint8_t *map, int bit);
 char *line_read(char *buf, size_t len, int fd);
 void procfs_scan_listen(struct ctx *c, uint8_t proto, int ip_version, int ns,
 			uint8_t *map, uint8_t *exclude);
-void drop_caps(void);
-void drop_root(uid_t uid, gid_t gid);
 int ns_enter(const struct ctx *c);
 void write_pidfile(int fd, pid_t pid);
 int __daemon(int pidfile_fd, int devnull_fd);
-- 
@@ -233,8 +233,6 @@ int bitmap_isset(const uint8_t *map, int bit);
 char *line_read(char *buf, size_t len, int fd);
 void procfs_scan_listen(struct ctx *c, uint8_t proto, int ip_version, int ns,
 			uint8_t *map, uint8_t *exclude);
-void drop_caps(void);
-void drop_root(uid_t uid, gid_t gid);
 int ns_enter(const struct ctx *c);
 void write_pidfile(int fd, pid_t pid);
 int __daemon(int pidfile_fd, int devnull_fd);
-- 
2.37.3


  parent reply	other threads:[~2022-09-08  3:59 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-08  3:58 [PATCH v2 00/10] Clean up handling of userns David Gibson
2022-09-08  3:58 ` [PATCH v2 01/10] Don't store UID & GID persistently in the context structure David Gibson
2022-09-08  3:58 ` [PATCH v2 02/10] Split checking for root from dropping root privilege David Gibson
2022-09-09 14:33   ` Stefano Brivio
2022-09-10  7:09     ` David Gibson
2022-09-08  3:59 ` [PATCH v2 03/10] Consolidate determination of UID/GID to run as David Gibson
2022-09-09 14:33   ` Stefano Brivio
2022-09-10  7:15     ` David Gibson
2022-09-10 20:43       ` Stefano Brivio
2022-09-12  9:53         ` David Gibson
2022-09-13  3:49           ` Stefano Brivio
2022-09-13  5:20             ` David Gibson
2022-09-08  3:59 ` [PATCH v2 04/10] Safer handling if we can't open /proc/self/uid_map David Gibson
2022-09-09 14:33   ` Stefano Brivio
2022-09-10  7:23     ` David Gibson
2022-09-08  3:59 ` David Gibson [this message]
2022-09-09 14:33   ` [PATCH v2 05/10] Move self-isolation code into a separate file Stefano Brivio
2022-09-10  7:23     ` David Gibson
2022-09-10 20:43       ` Stefano Brivio
2022-09-08  3:59 ` [PATCH v2 06/10] Consolidate validation of pasta namespace options David Gibson
2022-09-08  3:59 ` [PATCH v2 07/10] Clean up and rename conf_ns_open() David Gibson
2022-09-08  3:59 ` [PATCH v2 08/10] Correctly handle --netns-only in pasta_start_ns() David Gibson
2022-09-09 14:34   ` Stefano Brivio
2022-09-10  7:25     ` David Gibson
2022-09-11  8:26       ` David Gibson
2022-09-13  3:50         ` Stefano Brivio
2022-09-08  3:59 ` [PATCH v2 09/10] Handle userns isolation and dropping root at the same time David Gibson
2022-09-08  3:59 ` [PATCH v2 10/10] Allow --userns when pasta spawns a command David Gibson
2022-09-09 14:34   ` Stefano Brivio
2022-09-10  7:29     ` David Gibson
2022-09-10 20:42       ` Stefano Brivio
2022-09-09 14:36 ` [PATCH v2 00/10] Clean up handling of userns Stefano Brivio
2022-09-10  7:30   ` David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220908035907.1750314-6-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).