* [PATCH v3] passt, util: Close any open file that the parent might have leaked
@ 2024-08-07 11:11 Stefano Brivio
2024-08-07 11:26 ` Paul Holzinger
0 siblings, 1 reply; 3+ messages in thread
From: Stefano Brivio @ 2024-08-07 11:11 UTC (permalink / raw)
To: passt-dev; +Cc: Paul Holzinger, David Gibson
If a parent accidentally or due to implementation reasons leaks any
open file, we don't want to have access to them, except for the file
passed via --fd, if any.
This is the case for Podman when Podman's parent leaks files into
Podman: it's not practical for Podman to close unrelated files before
starting pasta, as reported by Paul.
Use close_range(2) to close all open files except for standard streams
and the one from --fd.
Given that parts of conf() depend on other files to be already opened,
such as the epoll file descriptor, we can't easily defer this to a
more convenient point, where --fd was already parsed. Introduce a
minimal, duplicate version of --fd parsing to keep this simple.
As we need to check that the passed --fd option doesn't exceed
INT_MAX, because we'll parse it with strtol() but file descriptor
indices are signed ints (regardless of the arguments close_range()
take), extend the existing check in the actual --fd parsing in conf(),
while at it.
Suggested-by: Paul Holzinger <pholzing@redhat.com>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
---
v3: Handle --fd 3 case, and don't overflow if the --fd number exceeds
UINT_MAX: add an explicit check to ensure it's less than INT_MAX
v2: Move call to close_open_files() to isolate_initial()
conf.c | 3 ++-
isolation.c | 12 +++++++++---
isolation.h | 2 +-
passt.c | 2 +-
util.c | 38 ++++++++++++++++++++++++++++++++++++++
util.h | 1 +
6 files changed, 52 insertions(+), 6 deletions(-)
diff --git a/conf.c b/conf.c
index 14d8ece..5422813 100644
--- a/conf.c
+++ b/conf.c
@@ -1260,6 +1260,7 @@ void conf(struct ctx *c, int argc, char **argv)
c->tcp.fwd_in.mode = c->tcp.fwd_out.mode = FWD_UNSET;
c->udp.fwd_in.mode = c->udp.fwd_out.mode = FWD_UNSET;
+ optind = 1;
do {
name = getopt_long(argc, argv, optstring, options, NULL);
@@ -1426,7 +1427,7 @@ void conf(struct ctx *c, int argc, char **argv)
errno = 0;
c->fd_tap = strtol(optarg, NULL, 0);
- if (c->fd_tap < 0 || errno)
+ if (c->fd_tap < 0 || errno || c->fd_tap > INT_MAX)
die("Invalid --fd: %s", optarg);
c->one_off = true;
diff --git a/isolation.c b/isolation.c
index 4956d7e..45fba1e 100644
--- a/isolation.c
+++ b/isolation.c
@@ -29,7 +29,8 @@
*
* Executed immediately after startup, drops capabilities we don't
* need at any point during execution (or which we gain back when we
- * need by joining other namespaces).
+ * need by joining other namespaces), and closes any leaked file we
+ * might have inherited from the parent process.
*
* 2. isolate_user()
* =================
@@ -166,14 +167,17 @@ static void clamp_caps(void)
}
/**
- * isolate_initial() - Early, config independent self isolation
+ * isolate_initial() - Early, mostly config independent self isolation
+ * @argc: Argument count
+ * @argv: Command line options: only --fd (if present) is relevant here
*
* Should:
* - drop unneeded capabilities
+ * - close all open files except for standard streams and the one from --fd
* Musn't:
* - remove filesytem access (we need to access files during setup)
*/
-void isolate_initial(void)
+void isolate_initial(int argc, char **argv)
{
uint64_t keep;
@@ -207,6 +211,8 @@ void isolate_initial(void)
keep |= BIT(CAP_SETFCAP) | BIT(CAP_SYS_PTRACE);
drop_caps_ep_except(keep);
+
+ close_open_files(argc, argv);
}
/**
diff --git a/isolation.h b/isolation.h
index 846b2af..80bb68d 100644
--- a/isolation.h
+++ b/isolation.h
@@ -7,7 +7,7 @@
#ifndef ISOLATION_H
#define ISOLATION_H
-void isolate_initial(void);
+void isolate_initial(int argc, char **argv);
void isolate_user(uid_t uid, gid_t gid, bool use_userns, const char *userns,
enum passt_modes mode);
int isolate_prefork(const struct ctx *c);
diff --git a/passt.c b/passt.c
index ea5bece..4b3c306 100644
--- a/passt.c
+++ b/passt.c
@@ -211,7 +211,7 @@ int main(int argc, char **argv)
arch_avx2_exec(argv);
- isolate_initial();
+ isolate_initial(argc, argv);
c.pasta_netns_fd = c.fd_tap = c.pidfile_fd = -1;
diff --git a/util.c b/util.c
index 07fb21c..9c6be6a 100644
--- a/util.c
+++ b/util.c
@@ -26,6 +26,7 @@
#include <errno.h>
#include <stdbool.h>
#include <linux/errqueue.h>
+#include <getopt.h>
#include "util.h"
#include "iov.h"
@@ -694,3 +695,40 @@ const char *str_ee_origin(const struct sock_extended_err *ee)
return "<invalid>";
}
+
+/**
+ * close_open_files() - Close leaked files, but not --fd, stdin, stdout, stderr
+ * @argc: Argument count
+ * @argv: Command line options, as we need to skip any file given via --fd
+ */
+void close_open_files(int argc, char **argv)
+{
+ const struct option optfd[] = { { "fd", required_argument, NULL, 'F' },
+ { 0 },
+ };
+ long fd = -1;
+ int name;
+
+ do {
+ name = getopt_long(argc, argv, ":F", optfd, NULL);
+
+ if (name == 'F') {
+ errno = 0;
+ fd = strtol(optarg, NULL, 0);
+
+ if (fd < 0 || errno || fd > INT_MAX)
+ die("Invalid --fd: %s", optarg);
+ }
+ } while (name != -1);
+
+ if (fd == -1 || fd == 3) {
+ unsigned int first = (fd == 3) ? 4 : 3;
+
+ if (close_range(first, ~0U, CLOSE_RANGE_UNSHARE))
+ die_perror("Failed to close files leaked by parent");
+ } else {
+ if (close_range(3, fd - 1, CLOSE_RANGE_UNSHARE) ||
+ close_range(fd + 1, ~0U, CLOSE_RANGE_UNSHARE))
+ die_perror("Failed to close files leaked by parent");
+ }
+}
diff --git a/util.h b/util.h
index 83d2b53..cb4d181 100644
--- a/util.h
+++ b/util.h
@@ -183,6 +183,7 @@ int __daemon(int pidfile_fd, int devnull_fd);
int fls(unsigned long x);
int write_file(const char *path, const char *buf);
int write_remainder(int fd, const struct iovec *iov, size_t iovcnt, size_t skip);
+void close_open_files(int argc, char **argv);
/**
* af_name() - Return name of an address family
--
@@ -183,6 +183,7 @@ int __daemon(int pidfile_fd, int devnull_fd);
int fls(unsigned long x);
int write_file(const char *path, const char *buf);
int write_remainder(int fd, const struct iovec *iov, size_t iovcnt, size_t skip);
+void close_open_files(int argc, char **argv);
/**
* af_name() - Return name of an address family
--
2.43.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH v3] passt, util: Close any open file that the parent might have leaked
2024-08-07 11:11 [PATCH v3] passt, util: Close any open file that the parent might have leaked Stefano Brivio
@ 2024-08-07 11:26 ` Paul Holzinger
2024-08-07 11:37 ` Stefano Brivio
0 siblings, 1 reply; 3+ messages in thread
From: Paul Holzinger @ 2024-08-07 11:26 UTC (permalink / raw)
To: Stefano Brivio, passt-dev; +Cc: David Gibson
On 07/08/2024 13:11, Stefano Brivio wrote:
> If a parent accidentally or due to implementation reasons leaks any
> open file, we don't want to have access to them, except for the file
> passed via --fd, if any.
>
> This is the case for Podman when Podman's parent leaks files into
> Podman: it's not practical for Podman to close unrelated files before
> starting pasta, as reported by Paul.
>
> Use close_range(2) to close all open files except for standard streams
> and the one from --fd.
>
> Given that parts of conf() depend on other files to be already opened,
> such as the epoll file descriptor, we can't easily defer this to a
> more convenient point, where --fd was already parsed. Introduce a
> minimal, duplicate version of --fd parsing to keep this simple.
>
> As we need to check that the passed --fd option doesn't exceed
> INT_MAX, because we'll parse it with strtol() but file descriptor
> indices are signed ints (regardless of the arguments close_range()
> take), extend the existing check in the actual --fd parsing in conf(),
> while at it.
>
> Suggested-by: Paul Holzinger <pholzing@redhat.com>
> Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
> ---
> v3: Handle --fd 3 case, and don't overflow if the --fd number exceeds
> UINT_MAX: add an explicit check to ensure it's less than INT_MAX
>
> v2: Move call to close_open_files() to isolate_initial()
>
> conf.c | 3 ++-
> isolation.c | 12 +++++++++---
> isolation.h | 2 +-
> passt.c | 2 +-
> util.c | 38 ++++++++++++++++++++++++++++++++++++++
> util.h | 1 +
> 6 files changed, 52 insertions(+), 6 deletions(-)
>
> diff --git a/conf.c b/conf.c
> index 14d8ece..5422813 100644
> --- a/conf.c
> +++ b/conf.c
> @@ -1260,6 +1260,7 @@ void conf(struct ctx *c, int argc, char **argv)
> c->tcp.fwd_in.mode = c->tcp.fwd_out.mode = FWD_UNSET;
> c->udp.fwd_in.mode = c->udp.fwd_out.mode = FWD_UNSET;
>
> + optind = 1;
> do {
> name = getopt_long(argc, argv, optstring, options, NULL);
>
> @@ -1426,7 +1427,7 @@ void conf(struct ctx *c, int argc, char **argv)
> errno = 0;
> c->fd_tap = strtol(optarg, NULL, 0);
>
> - if (c->fd_tap < 0 || errno)
> + if (c->fd_tap < 0 || errno || c->fd_tap > INT_MAX)
> die("Invalid --fd: %s", optarg);
>
> c->one_off = true;
> diff --git a/isolation.c b/isolation.c
> index 4956d7e..45fba1e 100644
> --- a/isolation.c
> +++ b/isolation.c
> @@ -29,7 +29,8 @@
> *
> * Executed immediately after startup, drops capabilities we don't
> * need at any point during execution (or which we gain back when we
> - * need by joining other namespaces).
> + * need by joining other namespaces), and closes any leaked file we
> + * might have inherited from the parent process.
> *
> * 2. isolate_user()
> * =================
> @@ -166,14 +167,17 @@ static void clamp_caps(void)
> }
>
> /**
> - * isolate_initial() - Early, config independent self isolation
> + * isolate_initial() - Early, mostly config independent self isolation
> + * @argc: Argument count
> + * @argv: Command line options: only --fd (if present) is relevant here
> *
> * Should:
> * - drop unneeded capabilities
> + * - close all open files except for standard streams and the one from --fd
> * Musn't:
> * - remove filesytem access (we need to access files during setup)
> */
> -void isolate_initial(void)
> +void isolate_initial(int argc, char **argv)
> {
> uint64_t keep;
>
> @@ -207,6 +211,8 @@ void isolate_initial(void)
> keep |= BIT(CAP_SETFCAP) | BIT(CAP_SYS_PTRACE);
>
> drop_caps_ep_except(keep);
> +
> + close_open_files(argc, argv);
> }
>
> /**
> diff --git a/isolation.h b/isolation.h
> index 846b2af..80bb68d 100644
> --- a/isolation.h
> +++ b/isolation.h
> @@ -7,7 +7,7 @@
> #ifndef ISOLATION_H
> #define ISOLATION_H
>
> -void isolate_initial(void);
> +void isolate_initial(int argc, char **argv);
> void isolate_user(uid_t uid, gid_t gid, bool use_userns, const char *userns,
> enum passt_modes mode);
> int isolate_prefork(const struct ctx *c);
> diff --git a/passt.c b/passt.c
> index ea5bece..4b3c306 100644
> --- a/passt.c
> +++ b/passt.c
> @@ -211,7 +211,7 @@ int main(int argc, char **argv)
>
> arch_avx2_exec(argv);
>
> - isolate_initial();
> + isolate_initial(argc, argv);
>
> c.pasta_netns_fd = c.fd_tap = c.pidfile_fd = -1;
>
> diff --git a/util.c b/util.c
> index 07fb21c..9c6be6a 100644
> --- a/util.c
> +++ b/util.c
> @@ -26,6 +26,7 @@
> #include <errno.h>
> #include <stdbool.h>
> #include <linux/errqueue.h>
> +#include <getopt.h>
>
> #include "util.h"
> #include "iov.h"
> @@ -694,3 +695,40 @@ const char *str_ee_origin(const struct sock_extended_err *ee)
>
> return "<invalid>";
> }
> +
> +/**
> + * close_open_files() - Close leaked files, but not --fd, stdin, stdout, stderr
> + * @argc: Argument count
> + * @argv: Command line options, as we need to skip any file given via --fd
> + */
> +void close_open_files(int argc, char **argv)
> +{
> + const struct option optfd[] = { { "fd", required_argument, NULL, 'F' },
> + { 0 },
> + };
> + long fd = -1;
> + int name;
> +
> + do {
> + name = getopt_long(argc, argv, ":F", optfd, NULL);
> +
> + if (name == 'F') {
> + errno = 0;
> + fd = strtol(optarg, NULL, 0);
> +
> + if (fd < 0 || errno || fd > INT_MAX)
> + die("Invalid --fd: %s", optarg);
> + }
> + } while (name != -1);
> +
> + if (fd == -1 || fd == 3) {
> + unsigned int first = (fd == 3) ? 4 : 3;
> +
> + if (close_range(first, ~0U, CLOSE_RANGE_UNSHARE))
> + die_perror("Failed to close files leaked by parent");
> + } else {
> + if (close_range(3, fd - 1, CLOSE_RANGE_UNSHARE) ||
> + close_range(fd + 1, ~0U, CLOSE_RANGE_UNSHARE))
> + die_perror("Failed to close files leaked by parent");
> + }
Sorry that I didn't mentioned this before but doesn't this still fail
when given fd 0, 1 or 2? I guess it should check if (fd < 3) in the
die(Invalid --fd) case, unless someone sees a reason to allow passing
the fd on stdio fds?
> +}
> diff --git a/util.h b/util.h
> index 83d2b53..cb4d181 100644
> --- a/util.h
> +++ b/util.h
> @@ -183,6 +183,7 @@ int __daemon(int pidfile_fd, int devnull_fd);
> int fls(unsigned long x);
> int write_file(const char *path, const char *buf);
> int write_remainder(int fd, const struct iovec *iov, size_t iovcnt, size_t skip);
> +void close_open_files(int argc, char **argv);
>
> /**
> * af_name() - Return name of an address family
--
Paul Holzinger
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH v3] passt, util: Close any open file that the parent might have leaked
2024-08-07 11:26 ` Paul Holzinger
@ 2024-08-07 11:37 ` Stefano Brivio
0 siblings, 0 replies; 3+ messages in thread
From: Stefano Brivio @ 2024-08-07 11:37 UTC (permalink / raw)
To: Paul Holzinger; +Cc: passt-dev, David Gibson
On Wed, 7 Aug 2024 13:26:32 +0200
Paul Holzinger <pholzing@redhat.com> wrote:
> On 07/08/2024 13:11, Stefano Brivio wrote:
> > If a parent accidentally or due to implementation reasons leaks any
> > open file, we don't want to have access to them, except for the file
> > passed via --fd, if any.
> >
> > This is the case for Podman when Podman's parent leaks files into
> > Podman: it's not practical for Podman to close unrelated files before
> > starting pasta, as reported by Paul.
> >
> > Use close_range(2) to close all open files except for standard streams
> > and the one from --fd.
> >
> > Given that parts of conf() depend on other files to be already opened,
> > such as the epoll file descriptor, we can't easily defer this to a
> > more convenient point, where --fd was already parsed. Introduce a
> > minimal, duplicate version of --fd parsing to keep this simple.
> >
> > As we need to check that the passed --fd option doesn't exceed
> > INT_MAX, because we'll parse it with strtol() but file descriptor
> > indices are signed ints (regardless of the arguments close_range()
> > take), extend the existing check in the actual --fd parsing in conf(),
> > while at it.
> >
> > Suggested-by: Paul Holzinger <pholzing@redhat.com>
> > Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
> > ---
> > v3: Handle --fd 3 case, and don't overflow if the --fd number exceeds
> > UINT_MAX: add an explicit check to ensure it's less than INT_MAX
> >
> > v2: Move call to close_open_files() to isolate_initial()
> >
> > conf.c | 3 ++-
> > isolation.c | 12 +++++++++---
> > isolation.h | 2 +-
> > passt.c | 2 +-
> > util.c | 38 ++++++++++++++++++++++++++++++++++++++
> > util.h | 1 +
> > 6 files changed, 52 insertions(+), 6 deletions(-)
> >
> > diff --git a/conf.c b/conf.c
> > index 14d8ece..5422813 100644
> > --- a/conf.c
> > +++ b/conf.c
> > @@ -1260,6 +1260,7 @@ void conf(struct ctx *c, int argc, char **argv)
> > c->tcp.fwd_in.mode = c->tcp.fwd_out.mode = FWD_UNSET;
> > c->udp.fwd_in.mode = c->udp.fwd_out.mode = FWD_UNSET;
> >
> > + optind = 1;
> > do {
> > name = getopt_long(argc, argv, optstring, options, NULL);
> >
> > @@ -1426,7 +1427,7 @@ void conf(struct ctx *c, int argc, char **argv)
> > errno = 0;
> > c->fd_tap = strtol(optarg, NULL, 0);
> >
> > - if (c->fd_tap < 0 || errno)
> > + if (c->fd_tap < 0 || errno || c->fd_tap > INT_MAX)
> > die("Invalid --fd: %s", optarg);
> >
> > c->one_off = true;
> > diff --git a/isolation.c b/isolation.c
> > index 4956d7e..45fba1e 100644
> > --- a/isolation.c
> > +++ b/isolation.c
> > @@ -29,7 +29,8 @@
> > *
> > * Executed immediately after startup, drops capabilities we don't
> > * need at any point during execution (or which we gain back when we
> > - * need by joining other namespaces).
> > + * need by joining other namespaces), and closes any leaked file we
> > + * might have inherited from the parent process.
> > *
> > * 2. isolate_user()
> > * =================
> > @@ -166,14 +167,17 @@ static void clamp_caps(void)
> > }
> >
> > /**
> > - * isolate_initial() - Early, config independent self isolation
> > + * isolate_initial() - Early, mostly config independent self isolation
> > + * @argc: Argument count
> > + * @argv: Command line options: only --fd (if present) is relevant here
> > *
> > * Should:
> > * - drop unneeded capabilities
> > + * - close all open files except for standard streams and the one from --fd
> > * Musn't:
> > * - remove filesytem access (we need to access files during setup)
> > */
> > -void isolate_initial(void)
> > +void isolate_initial(int argc, char **argv)
> > {
> > uint64_t keep;
> >
> > @@ -207,6 +211,8 @@ void isolate_initial(void)
> > keep |= BIT(CAP_SETFCAP) | BIT(CAP_SYS_PTRACE);
> >
> > drop_caps_ep_except(keep);
> > +
> > + close_open_files(argc, argv);
> > }
> >
> > /**
> > diff --git a/isolation.h b/isolation.h
> > index 846b2af..80bb68d 100644
> > --- a/isolation.h
> > +++ b/isolation.h
> > @@ -7,7 +7,7 @@
> > #ifndef ISOLATION_H
> > #define ISOLATION_H
> >
> > -void isolate_initial(void);
> > +void isolate_initial(int argc, char **argv);
> > void isolate_user(uid_t uid, gid_t gid, bool use_userns, const char *userns,
> > enum passt_modes mode);
> > int isolate_prefork(const struct ctx *c);
> > diff --git a/passt.c b/passt.c
> > index ea5bece..4b3c306 100644
> > --- a/passt.c
> > +++ b/passt.c
> > @@ -211,7 +211,7 @@ int main(int argc, char **argv)
> >
> > arch_avx2_exec(argv);
> >
> > - isolate_initial();
> > + isolate_initial(argc, argv);
> >
> > c.pasta_netns_fd = c.fd_tap = c.pidfile_fd = -1;
> >
> > diff --git a/util.c b/util.c
> > index 07fb21c..9c6be6a 100644
> > --- a/util.c
> > +++ b/util.c
> > @@ -26,6 +26,7 @@
> > #include <errno.h>
> > #include <stdbool.h>
> > #include <linux/errqueue.h>
> > +#include <getopt.h>
> >
> > #include "util.h"
> > #include "iov.h"
> > @@ -694,3 +695,40 @@ const char *str_ee_origin(const struct sock_extended_err *ee)
> >
> > return "<invalid>";
> > }
> > +
> > +/**
> > + * close_open_files() - Close leaked files, but not --fd, stdin, stdout, stderr
> > + * @argc: Argument count
> > + * @argv: Command line options, as we need to skip any file given via --fd
> > + */
> > +void close_open_files(int argc, char **argv)
> > +{
> > + const struct option optfd[] = { { "fd", required_argument, NULL, 'F' },
> > + { 0 },
> > + };
> > + long fd = -1;
> > + int name;
> > +
> > + do {
> > + name = getopt_long(argc, argv, ":F", optfd, NULL);
> > +
> > + if (name == 'F') {
> > + errno = 0;
> > + fd = strtol(optarg, NULL, 0);
> > +
> > + if (fd < 0 || errno || fd > INT_MAX)
> > + die("Invalid --fd: %s", optarg);
> > + }
> > + } while (name != -1);
> > +
> > + if (fd == -1 || fd == 3) {
> > + unsigned int first = (fd == 3) ? 4 : 3;
> > +
> > + if (close_range(first, ~0U, CLOSE_RANGE_UNSHARE))
> > + die_perror("Failed to close files leaked by parent");
> > + } else {
> > + if (close_range(3, fd - 1, CLOSE_RANGE_UNSHARE) ||
> > + close_range(fd + 1, ~0U, CLOSE_RANGE_UNSHARE))
> > + die_perror("Failed to close files leaked by parent");
> > + }
> Sorry that I didn't mentioned this before but doesn't this still fail
> when given fd 0, 1 or 2? I guess it should check if (fd < 3) in the
> die(Invalid --fd) case, unless someone sees a reason to allow passing
> the fd on stdio fds?
Oops, I missed your comment as I was sending v4. Right, that would also
be problematic.
I think the only semi-reasonable thing somebody could try is to close
our standard input, because we don't use it at all, and use that as
--fd. But it wouldn't work anyway at the moment, in any case where we
call __daemon(), because we would happily close that file descriptor.
Same for standard output and standard error anyway.
So, yeah, let's add the check you proposed, and should somebody ever
come up with a '--fd 0' use case, we'll handle that. v5 coming.
--
Stefano
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2024-08-07 11:37 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-08-07 11:11 [PATCH v3] passt, util: Close any open file that the parent might have leaked Stefano Brivio
2024-08-07 11:26 ` Paul Holzinger
2024-08-07 11:37 ` Stefano Brivio
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).