// SPDX-License-Identifier: AGPL-3.0-or-later /* PASST - Plug A Simple Socket Transport * for qemu/UNIX domain socket mode * * PASTA - Pack A Subtle Tap Abstraction * for network namespace/tap device mode * * isolation.c - Self isolation helpers * * Copyright Red Hat * Author: Stefano Brivio * Author: David Gibson */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "util.h" #include "seccomp.h" #include "passt.h" #include "isolation.h" /** * drop_caps() - Drop capabilities we might have except for CAP_NET_BIND_SERVICE */ void drop_caps(void) { int i; for (i = 0; i < 64; i++) { if (i == CAP_NET_BIND_SERVICE) continue; prctl(PR_CAPBSET_DROP, i, 0, 0, 0); } } /** * drop_root() - Switch to given UID and GID */ void drop_root(uid_t uid, gid_t gid) { if (setgroups(0, NULL)) { /* If we don't start with CAP_SETGID, this will EPERM */ if (errno != EPERM) { err("Can't drop supplementary groups: %s", strerror(errno)); exit(EXIT_FAILURE); } } if (!setgid(gid) && !setuid(uid)) return; err("Can't change user/group, exiting"); exit(EXIT_FAILURE); } /** * sandbox() - Unshare IPC, mount, PID, UTS, and user namespaces, "unmount" root * * Return: negative error code on failure, zero on success */ int sandbox(struct ctx *c) { int flags = CLONE_NEWIPC | CLONE_NEWNS | CLONE_NEWUTS; if (!c->netns_only) { if (c->pasta_userns_fd == -1) flags |= CLONE_NEWUSER; else setns(c->pasta_userns_fd, CLONE_NEWUSER); } c->pasta_userns_fd = -1; /* If we run in foreground, we have no chance to actually move to a new * PID namespace. For passt, use CLONE_NEWPID anyway, in case somebody * ever gets around seccomp profiles -- there's no harm in passing it. */ if (!c->foreground || c->mode == MODE_PASST) flags |= CLONE_NEWPID; if (unshare(flags)) { perror("unshare"); return -errno; } if (mount("", "/", "", MS_UNBINDABLE | MS_REC, NULL)) { perror("mount /"); return -errno; } if (mount("", TMPDIR, "tmpfs", MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RDONLY, "nr_inodes=2,nr_blocks=0")) { perror("mount tmpfs"); return -errno; } if (chdir(TMPDIR)) { perror("chdir"); return -errno; } if (syscall(SYS_pivot_root, ".", ".")) { perror("pivot_root"); return -errno; } if (umount2(".", MNT_DETACH | UMOUNT_NOFOLLOW)) { perror("umount2"); return -errno; } drop_caps(); /* Relative to the new user namespace this time. */ return 0; } /** * seccomp() - Set up seccomp filters depending on mode, won't return on failure * @c: Execution context */ void seccomp(const struct ctx *c) { struct sock_fprog prog; if (c->mode == MODE_PASST) { prog.len = (unsigned short)ARRAY_SIZE(filter_passt); prog.filter = filter_passt; } else { prog.len = (unsigned short)ARRAY_SIZE(filter_pasta); prog.filter = filter_pasta; } if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) || prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { perror("prctl"); exit(EXIT_FAILURE); } }