From mboxrd@z Thu Jan 1 00:00:00 1970 From: David Gibson To: passt-dev@passt.top Subject: [PATCH 05/10] Move self-isolation code into a separate file Date: Wed, 07 Sep 2022 11:45:04 +1000 Message-ID: <20220907014509.3480812-6-david@gibson.dropbear.id.au> In-Reply-To: <20220907014509.3480812-1-david@gibson.dropbear.id.au> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="===============2754666717914623104==" --===============2754666717914623104== Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable passt/pasta contains a number of routines designed to isolate passt from the rest of the system for security. These are spread through util.c and passt.c. Move them together into a new isolation.c file. Signed-off-by: David Gibson --- Makefile | 8 +-- conf.c | 1 + isolation.c | 164 ++++++++++++++++++++++++++++++++++++++++++++++++++++ isolation.h | 15 +++++ passt.c | 114 +----------------------------------- pasta.c | 1 + util.c | 50 ---------------- util.h | 2 - 8 files changed, 186 insertions(+), 169 deletions(-) create mode 100644 isolation.c create mode 100644 isolation.h diff --git a/Makefile b/Makefile index 26e64d5..f5ba62d 100644 --- a/Makefile +++ b/Makefile @@ -32,16 +32,16 @@ CFLAGS +=3D -DRLIMIT_STACK_VAL=3D$(RLIMIT_STACK_VAL) CFLAGS +=3D -DARCH=3D\"$(TARGET_ARCH)\" =20 PASST_SRCS =3D arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c icmp.c igmp.c \ - lineread.c mld.c ndp.c netlink.c packet.c passt.c pasta.c pcap.c \ - siphash.c tap.c tcp.c tcp_splice.c udp.c util.c + isolation.c lineread.c mld.c ndp.c netlink.c packet.c passt.c pasta.c \ + pcap.c siphash.c tap.c tcp.c tcp_splice.c udp.c util.c QRAP_SRCS =3D qrap.c SRCS =3D $(PASST_SRCS) $(QRAP_SRCS) =20 MANPAGES =3D passt.1 pasta.1 qrap.1 =20 PASST_HEADERS =3D arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h icmp.h \ - lineread.h ndp.h netlink.h packet.h passt.h pasta.h pcap.h \ - siphash.h tap.h tcp.h tcp_splice.h udp.h util.h + isolation.h lineread.h ndp.h netlink.h packet.h passt.h pasta.h \ + pcap.h siphash.h tap.h tcp.h tcp_splice.h udp.h util.h HEADERS =3D $(PASST_HEADERS) =20 # On gcc 11.2, with -O2 and -flto, tcp_hash() and siphash_20b(), if inlined, diff --git a/conf.c b/conf.c index f1aaa8a..08a2106 100644 --- a/conf.c +++ b/conf.c @@ -40,6 +40,7 @@ #include "tcp.h" #include "pasta.h" #include "lineread.h" +#include "isolation.h" =20 /** * get_bound_ports() - Get maps of ports with bound sockets diff --git a/isolation.c b/isolation.c new file mode 100644 index 0000000..06cc5c3 --- /dev/null +++ b/isolation.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +/* PASST - Plug A Simple Socket Transport + * for qemu/UNIX domain socket mode + * + * PASTA - Pack A Subtle Tap Abstraction + * for network namespace/tap device mode + * + * isolation.c - Self isolation helpers + * + * Copyright Red Hat + * Author: Stefano Brivio + * Author: David Gibson + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "util.h" +#include "seccomp.h" +#include "passt.h" +#include "isolation.h" + +/** + * drop_caps() - Drop capabilities we might have except for CAP_NET_BIND_SER= VICE + */ +void drop_caps(void) +{ + int i; + + for (i =3D 0; i < 64; i++) { + if (i =3D=3D CAP_NET_BIND_SERVICE) + continue; + + prctl(PR_CAPBSET_DROP, i, 0, 0, 0); + } +} + +/** + * drop_root() - Switch to given UID and GID + */ +void drop_root(uid_t uid, gid_t gid) +{ + if (setgroups(0, NULL)) { + /* If we don't start with CAP_SETGID, this will EPERM */ + if (errno !=3D EPERM) { + err("Can't drop supplementary groups: %s", + strerror(errno)); + exit(EXIT_FAILURE); + } + } + + if (!setgid(gid) && !setuid(uid)) + return; + + err("Can't change user/group, exiting"); + exit(EXIT_FAILURE); +} + +/** + * sandbox() - Unshare IPC, mount, PID, UTS, and user namespaces, "unmount" = root + * + * Return: negative error code on failure, zero on success + */ +int sandbox(struct ctx *c) +{ + int flags =3D CLONE_NEWIPC | CLONE_NEWNS | CLONE_NEWUTS; + + if (!c->netns_only) { + if (c->pasta_userns_fd =3D=3D -1) + flags |=3D CLONE_NEWUSER; + else + setns(c->pasta_userns_fd, CLONE_NEWUSER); + } + + c->pasta_userns_fd =3D -1; + + /* If we run in foreground, we have no chance to actually move to a new + * PID namespace. For passt, use CLONE_NEWPID anyway, in case somebody + * ever gets around seccomp profiles -- there's no harm in passing it. + */ + if (!c->foreground || c->mode =3D=3D MODE_PASST) + flags |=3D CLONE_NEWPID; + + if (unshare(flags)) { + perror("unshare"); + return -errno; + } + + if (mount("", "/", "", MS_UNBINDABLE | MS_REC, NULL)) { + perror("mount /"); + return -errno; + } + + if (mount("", TMPDIR, "tmpfs", + MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RDONLY, + "nr_inodes=3D2,nr_blocks=3D0")) { + perror("mount tmpfs"); + return -errno; + } + + if (chdir(TMPDIR)) { + perror("chdir"); + return -errno; + } + + if (syscall(SYS_pivot_root, ".", ".")) { + perror("pivot_root"); + return -errno; + } + + if (umount2(".", MNT_DETACH | UMOUNT_NOFOLLOW)) { + perror("umount2"); + return -errno; + } + + drop_caps(); /* Relative to the new user namespace this time. */ + + return 0; +} + +/** + * seccomp() - Set up seccomp filters depending on mode, won't return on fai= lure + * @c: Execution context + */ +void seccomp(const struct ctx *c) +{ + struct sock_fprog prog; + + if (c->mode =3D=3D MODE_PASST) { + prog.len =3D (unsigned short)ARRAY_SIZE(filter_passt); + prog.filter =3D filter_passt; + } else { + prog.len =3D (unsigned short)ARRAY_SIZE(filter_pasta); + prog.filter =3D filter_pasta; + } + + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) || + prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { + perror("prctl"); + exit(EXIT_FAILURE); + } +} diff --git a/isolation.h b/isolation.h new file mode 100644 index 0000000..2540a35 --- /dev/null +++ b/isolation.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: AGPL-3.0-or-later + * Copyright Red Hat + * Author: Stefano Brivio + * Author: David Gibson + */ + +#ifndef ISOLATION_H +#define ISOLATION_H + +void drop_caps(void); +void drop_root(uid_t uid, gid_t gid); +int sandbox(struct ctx *c); +void seccomp(const struct ctx *c); + +#endif /* ISOLATION_H */ diff --git a/passt.c b/passt.c index bbf53d9..915d7e6 100644 --- a/passt.c +++ b/passt.c @@ -19,51 +19,24 @@ * created in a separate network namespace). */ =20 -#include -#include #include -#include -#include -#include -#include #include #include #include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include #include #include #include #include #include -#include #include -#include -#include -#include #include =20 -#include -#include -#include -#include - #include "util.h" -#include "seccomp.h" #include "passt.h" #include "dhcp.h" #include "dhcpv6.h" -#include "icmp.h" -#include "tcp.h" -#include "udp.h" +#include "isolation.h" #include "pcap.h" #include "tap.h" #include "conf.h" @@ -166,91 +139,6 @@ void proto_update_l2_buf(const unsigned char *eth_d, con= st unsigned char *eth_s, udp_update_l2_buf(eth_d, eth_s, ip_da); } =20 -/** - * seccomp() - Set up seccomp filters depending on mode, won't return on fai= lure - * @c: Execution context - */ -static void seccomp(const struct ctx *c) -{ - struct sock_fprog prog; - - if (c->mode =3D=3D MODE_PASST) { - prog.len =3D (unsigned short)ARRAY_SIZE(filter_passt); - prog.filter =3D filter_passt; - } else { - prog.len =3D (unsigned short)ARRAY_SIZE(filter_pasta); - prog.filter =3D filter_pasta; - } - - if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) || - prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { - perror("prctl"); - exit(EXIT_FAILURE); - } -} - -/** - * sandbox() - Unshare IPC, mount, PID, UTS, and user namespaces, "unmount" = root - * - * Return: negative error code on failure, zero on success - */ -static int sandbox(struct ctx *c) -{ - int flags =3D CLONE_NEWIPC | CLONE_NEWNS | CLONE_NEWUTS; - - if (!c->netns_only) { - if (c->pasta_userns_fd =3D=3D -1) - flags |=3D CLONE_NEWUSER; - else - setns(c->pasta_userns_fd, CLONE_NEWUSER); - } - - c->pasta_userns_fd =3D -1; - - /* If we run in foreground, we have no chance to actually move to a new - * PID namespace. For passt, use CLONE_NEWPID anyway, in case somebody - * ever gets around seccomp profiles -- there's no harm in passing it. - */ - if (!c->foreground || c->mode =3D=3D MODE_PASST) - flags |=3D CLONE_NEWPID; - - if (unshare(flags)) { - perror("unshare"); - return -errno; - } - - if (mount("", "/", "", MS_UNBINDABLE | MS_REC, NULL)) { - perror("mount /"); - return -errno; - } - - if (mount("", TMPDIR, "tmpfs", - MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RDONLY, - "nr_inodes=3D2,nr_blocks=3D0")) { - perror("mount tmpfs"); - return -errno; - } - - if (chdir(TMPDIR)) { - perror("chdir"); - return -errno; - } - - if (syscall(SYS_pivot_root, ".", ".")) { - perror("pivot_root"); - return -errno; - } - - if (umount2(".", MNT_DETACH | UMOUNT_NOFOLLOW)) { - perror("umount2"); - return -errno; - } - - drop_caps(); /* Relative to the new user namespace this time. */ - - return 0; -} - /** * exit_handler() - Signal handler for SIGQUIT and SIGTERM * @unused: Unused, handler deals with SIGQUIT and SIGTERM only diff --git a/pasta.c b/pasta.c index a844af2..0bdb655 100644 --- a/pasta.c +++ b/pasta.c @@ -40,6 +40,7 @@ =20 #include "util.h" #include "passt.h" +#include "isolation.h" #include "netlink.h" =20 /* PID of child, in case we created a namespace */ diff --git a/util.c b/util.c index 654410f..bf3011f 100644 --- a/util.c +++ b/util.c @@ -13,30 +13,16 @@ */ =20 #include -#include -#include -#include #include -#include #include #include -#include -#include -#include #include -#include -#include -#include #include #include #include #include #include #include -#include -#include - -#include =20 #include "util.h" #include "passt.h" @@ -467,42 +453,6 @@ void procfs_scan_listen(struct ctx *c, uint8_t proto, in= t ip_version, int ns, } } =20 -/** - * drop_caps() - Drop capabilities we might have except for CAP_NET_BIND_SER= VICE - */ -void drop_caps(void) -{ - int i; - - for (i =3D 0; i < 64; i++) { - if (i =3D=3D CAP_NET_BIND_SERVICE) - continue; - - prctl(PR_CAPBSET_DROP, i, 0, 0, 0); - } -} - -/** - * drop_root() - Switch to given UID and GID - */ -void drop_root(uid_t uid, gid_t gid) -{ - if (setgroups(0, NULL)) { - /* If we don't start with CAP_SETGID, this will EPERM */ - if (errno !=3D EPERM) { - err("Can't drop supplementary groups: %s", - strerror(errno)); - exit(EXIT_FAILURE); - } - } - - if (!setgid(gid) && !setuid(uid)) - return; - - err("Can't change user/group, exiting"); - exit(EXIT_FAILURE); -} - /** * ns_enter() - Enter configured user (unless already joined) and network ns * @c: Execution context diff --git a/util.h b/util.h index 9626cb5..1003303 100644 --- a/util.h +++ b/util.h @@ -233,8 +233,6 @@ int bitmap_isset(const uint8_t *map, int bit); char *line_read(char *buf, size_t len, int fd); void procfs_scan_listen(struct ctx *c, uint8_t proto, int ip_version, int ns, uint8_t *map, uint8_t *exclude); -void drop_caps(void); -void drop_root(uid_t uid, gid_t gid); int ns_enter(const struct ctx *c); void write_pidfile(int fd, pid_t pid); int __daemon(int pidfile_fd, int devnull_fd); --=20 2.37.3 --===============2754666717914623104==--