public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: passt-dev@passt.top, Stefano Brivio <sbrivio@redhat.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v2 06/13] epoll: Always use epoll_ref for the epoll data variable
Date: Thu, 10 Aug 2023 12:33:08 +1000	[thread overview]
Message-ID: <20230810023315.684784-7-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20230810023315.684784-1-david@gibson.dropbear.id.au>

epoll_ref contains a variety of information useful when handling epoll
events on our sockets, and we place it in the epoll_event data field
returned by epoll.  However, for a few other things we use the 'fd' field
in the standard union of types for that data field.

This actually introduces a bug which is vanishingly unlikely to hit in
practice, but very nasty if it ever did: theoretically if we had a very
large file descriptor number for fd_tap or fd_tap_listen it could overflow
into bits that overlap with the 'proto' field in epoll_ref.  With some
very bad luck this could mean that we mistakenly think an event on a
regular socket is an event on fd_tap or fd_tap_listen.

More practically, using different (but overlapping) fields of the
epoll_data means we can't unify dispatch for the various different objects
in the epoll.  Therefore use the same epoll_ref as the data for the tap
fds and the netns quit fd, adding new fd type values to describe them.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 passt.c | 11 ++++++-----
 passt.h |  6 +++++-
 pasta.c |  8 ++++++--
 tap.c   | 16 ++++++++++++----
 4 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/passt.c b/passt.c
index ae69478..45e9fbf 100644
--- a/passt.c
+++ b/passt.c
@@ -60,6 +60,8 @@ char *epoll_type_str[EPOLL_TYPE_MAX + 1] = {
 	[EPOLL_TYPE_UDP]	= "UDP socket",
 	[EPOLL_TYPE_ICMP]	= "ICMP socket",
 	[EPOLL_TYPE_ICMPV6]	= "ICMPv6 socket",
+	[EPOLL_TYPE_NSQUIT]	= "namespace inotify",
+	[EPOLL_TYPE_TAP]	= "tap device",
 };
 
 /**
@@ -328,12 +330,11 @@ loop:
 
 	for (i = 0; i < nfds; i++) {
 		union epoll_ref ref = *((union epoll_ref *)&events[i].data.u64);
-		int fd = events[i].data.fd;
 
-		if (fd == c.fd_tap || fd == c.fd_tap_listen)
-			tap_handler(&c, fd, events[i].events, &now);
-		else if (fd == quit_fd)
-			pasta_netns_quit_handler(&c, fd);
+		if (ref.type == EPOLL_TYPE_TAP)
+			tap_handler(&c, ref.fd, events[i].events, &now);
+		else if (ref.type == EPOLL_TYPE_NSQUIT)
+			pasta_netns_quit_handler(&c, quit_fd);
 		else
 			sock_handler(&c, ref, events[i].events, &now);
 	}
diff --git a/passt.h b/passt.h
index 2110781..8878a11 100644
--- a/passt.h
+++ b/passt.h
@@ -55,8 +55,12 @@ enum epoll_type {
 	EPOLL_TYPE_ICMP,
 	/* ICMPv6 sockets */
 	EPOLL_TYPE_ICMPV6,
+	/* inotify fd watching for end of netns (pasta) */
+	EPOLL_TYPE_NSQUIT,
+	/* tap char device, or qemu socket fd */
+	EPOLL_TYPE_TAP,
 
-	EPOLL_TYPE_MAX = EPOLL_TYPE_ICMPV6,
+	EPOLL_TYPE_MAX = EPOLL_TYPE_TAP,
 };
 
 /**
diff --git a/pasta.c b/pasta.c
index 1e84680..dbe8e8c 100644
--- a/pasta.c
+++ b/pasta.c
@@ -365,7 +365,10 @@ void pasta_ns_conf(struct ctx *c)
 int pasta_netns_quit_init(struct ctx *c)
 {
 	int flags = O_NONBLOCK | O_CLOEXEC;
-	struct epoll_event ev = { .events = EPOLLIN };
+	union epoll_ref ref = { .type = EPOLL_TYPE_NSQUIT };
+	struct epoll_event ev = {
+		.events = EPOLLIN
+	};
 	int inotify_fd;
 
 	if (c->mode != MODE_PASTA || c->no_netns_quit || !*c->netns_base)
@@ -381,7 +384,8 @@ int pasta_netns_quit_init(struct ctx *c)
 		return -1;
 	}
 
-	ev.data.fd = inotify_fd;
+	ref.fd = inotify_fd;
+	ev.data.u64 = ref.u64;
 	epoll_ctl(c->epollfd, EPOLL_CTL_ADD, inotify_fd, &ev);
 
 	return inotify_fd;
diff --git a/tap.c b/tap.c
index 501af33..6c3ce48 100644
--- a/tap.c
+++ b/tap.c
@@ -1076,6 +1076,7 @@ restart:
 static void tap_sock_unix_init(struct ctx *c)
 {
 	int fd = socket(AF_UNIX, SOCK_STREAM, 0);
+	union epoll_ref ref = { .type = EPOLL_TYPE_TAP };
 	struct epoll_event ev = { 0 };
 	struct sockaddr_un addr = {
 		.sun_family = AF_UNIX,
@@ -1128,8 +1129,9 @@ static void tap_sock_unix_init(struct ctx *c)
 
 	listen(fd, 0);
 
-	ev.data.fd = c->fd_tap_listen = fd;
+	ref.fd = c->fd_tap_listen = fd;
 	ev.events = EPOLLIN | EPOLLERR | EPOLLET;
+	ev.data.u64 = ref.u64;
 	epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap_listen, &ev);
 
 	info("You can now start qemu (>= 7.2, with commit 13c6be96618c):");
@@ -1146,6 +1148,7 @@ static void tap_sock_unix_init(struct ctx *c)
  */
 static void tap_sock_unix_new(struct ctx *c, uint32_t events)
 {
+	union epoll_ref ref = { .type = EPOLL_TYPE_TAP };
 	struct epoll_event ev = { 0 };
 	int v = INT_MAX / 2;
 	struct ucred ucred;
@@ -1185,8 +1188,9 @@ static void tap_sock_unix_new(struct ctx *c, uint32_t events)
 	    setsockopt(c->fd_tap, SOL_SOCKET, SO_SNDBUF, &v, sizeof(v)))
 		trace("tap: failed to set SO_SNDBUF to %i", v);
 
-	ev.data.fd = c->fd_tap;
+	ref.fd = c->fd_tap;
 	ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP;
+	ev.data.u64 = ref.u64;
 	epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev);
 }
 
@@ -1231,6 +1235,7 @@ static int tap_ns_tun(void *arg)
  */
 static void tap_sock_tun_init(struct ctx *c)
 {
+	union epoll_ref ref = { .type = EPOLL_TYPE_TAP };
 	struct epoll_event ev = { 0 };
 
 	NS_CALL(tap_ns_tun, c);
@@ -1239,8 +1244,9 @@ static void tap_sock_tun_init(struct ctx *c)
 
 	pasta_ns_conf(c);
 
-	ev.data.fd = c->fd_tap;
+	ref.fd = c->fd_tap;
 	ev.events = EPOLLIN | EPOLLRDHUP;
+	ev.data.u64 = ref.u64;
 	epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev);
 }
 
@@ -1262,11 +1268,13 @@ void tap_sock_init(struct ctx *c)
 	}
 
 	if (c->fd_tap != -1) { /* Passed as --fd */
+		union epoll_ref ref = { .type = EPOLL_TYPE_TAP };
 		struct epoll_event ev = { 0 };
 		ASSERT(c->one_off);
 
-		ev.data.fd = c->fd_tap;
+		ref.fd = c->fd_tap;
 		ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP;
+		ev.data.u64 = ref.u64;
 		epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev);
 		return;
 	}
-- 
@@ -1076,6 +1076,7 @@ restart:
 static void tap_sock_unix_init(struct ctx *c)
 {
 	int fd = socket(AF_UNIX, SOCK_STREAM, 0);
+	union epoll_ref ref = { .type = EPOLL_TYPE_TAP };
 	struct epoll_event ev = { 0 };
 	struct sockaddr_un addr = {
 		.sun_family = AF_UNIX,
@@ -1128,8 +1129,9 @@ static void tap_sock_unix_init(struct ctx *c)
 
 	listen(fd, 0);
 
-	ev.data.fd = c->fd_tap_listen = fd;
+	ref.fd = c->fd_tap_listen = fd;
 	ev.events = EPOLLIN | EPOLLERR | EPOLLET;
+	ev.data.u64 = ref.u64;
 	epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap_listen, &ev);
 
 	info("You can now start qemu (>= 7.2, with commit 13c6be96618c):");
@@ -1146,6 +1148,7 @@ static void tap_sock_unix_init(struct ctx *c)
  */
 static void tap_sock_unix_new(struct ctx *c, uint32_t events)
 {
+	union epoll_ref ref = { .type = EPOLL_TYPE_TAP };
 	struct epoll_event ev = { 0 };
 	int v = INT_MAX / 2;
 	struct ucred ucred;
@@ -1185,8 +1188,9 @@ static void tap_sock_unix_new(struct ctx *c, uint32_t events)
 	    setsockopt(c->fd_tap, SOL_SOCKET, SO_SNDBUF, &v, sizeof(v)))
 		trace("tap: failed to set SO_SNDBUF to %i", v);
 
-	ev.data.fd = c->fd_tap;
+	ref.fd = c->fd_tap;
 	ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP;
+	ev.data.u64 = ref.u64;
 	epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev);
 }
 
@@ -1231,6 +1235,7 @@ static int tap_ns_tun(void *arg)
  */
 static void tap_sock_tun_init(struct ctx *c)
 {
+	union epoll_ref ref = { .type = EPOLL_TYPE_TAP };
 	struct epoll_event ev = { 0 };
 
 	NS_CALL(tap_ns_tun, c);
@@ -1239,8 +1244,9 @@ static void tap_sock_tun_init(struct ctx *c)
 
 	pasta_ns_conf(c);
 
-	ev.data.fd = c->fd_tap;
+	ref.fd = c->fd_tap;
 	ev.events = EPOLLIN | EPOLLRDHUP;
+	ev.data.u64 = ref.u64;
 	epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev);
 }
 
@@ -1262,11 +1268,13 @@ void tap_sock_init(struct ctx *c)
 	}
 
 	if (c->fd_tap != -1) { /* Passed as --fd */
+		union epoll_ref ref = { .type = EPOLL_TYPE_TAP };
 		struct epoll_event ev = { 0 };
 		ASSERT(c->one_off);
 
-		ev.data.fd = c->fd_tap;
+		ref.fd = c->fd_tap;
 		ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP;
+		ev.data.u64 = ref.u64;
 		epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev);
 		return;
 	}
-- 
2.41.0


  parent reply	other threads:[~2023-08-10  2:33 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-10  2:33 [PATCH v2 00/13] Clean up to tap errors and epoll dispatch David Gibson
2023-08-10  2:33 ` [PATCH v2 01/13] tap: Clean up tap reset path David Gibson
2023-08-10  2:33 ` [PATCH v2 02/13] tap: Clean up behaviour for errors on listening Unix socket David Gibson
2023-08-10  2:33 ` [PATCH v2 03/13] tap: Fold reset handling into tap_handler_pasta() David Gibson
2023-08-10  2:33 ` [PATCH v2 04/13] tap: Fold reset handling into tap_handler_passt() David Gibson
2023-08-10 19:49   ` Stefano Brivio
2023-08-11  3:07     ` David Gibson
2023-08-10  2:33 ` [PATCH v2 05/13] epoll: Generalize epoll_ref to cover things other than sockets David Gibson
2023-08-10  2:33 ` David Gibson [this message]
2023-08-10  2:33 ` [PATCH v2 07/13] epoll: Fold sock_handler into general switch on epoll event fd David Gibson
2023-08-10 19:49   ` Stefano Brivio
2023-08-11  3:11     ` David Gibson
2023-08-10  2:33 ` [PATCH v2 08/13] epoll: Split handling of ICMP and ICMPv6 sockets David Gibson
2023-08-10  2:33 ` [PATCH v2 09/13] epoll: Tiny cleanup to udp_sock_handler() David Gibson
2023-08-10  2:33 ` [PATCH v2 10/13] epoll: Split handling of TCP timerfds into its own handler function David Gibson
2023-08-10  2:33 ` [PATCH v2 11/13] epoll: Split handling of listening TCP sockets into their own handler David Gibson
2023-08-10  2:33 ` [PATCH v2 12/13] epoll: Split listening Unix domain socket into its own type David Gibson
2023-08-10  2:33 ` [PATCH v2 13/13] epoll: Use different epoll types for passt and pasta tap fds David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230810023315.684784-7-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).