public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
* [PATCH] passt: introduce --fd-is-tap to allow passing TAP file descriptor
@ 2023-09-15 14:21 Nikolay Edigaryev
  2023-09-16 12:34 ` Stefano Brivio
  0 siblings, 1 reply; 6+ messages in thread
From: Nikolay Edigaryev @ 2023-09-15 14:21 UTC (permalink / raw)
  To: passt-dev; +Cc: Nikolay Edigaryev

Problem: I have a Cloud Hypervisor virtual machine that needs both
(1) an internet access without fiddling with iptables/Netfilter and
(2) VM <-> host access (to be able to provision this VM over SSH)
without dealing with passt port forwarding it doesn't seem to be
possible to map the whole IP address, yet the users expect an IP
instead of IP:port combination.

Requirement #1 is why I've choosen passt and it's pretty much
satisfied (thank you for this great piece of software!).

Requirement #2 implies some kind of bridge interface on the host
with one TAP interface for the VM and the other for the passt.

However, only pasta can accept TAP interface FD's in it's -F/--fd,
which is OK, but it also configures unneeded namespacing, which in
turn results in unneeded complexity and performance overhead due
to the need of involving veth pairs to break away from the pasta
namespace to the host for the requirement #2 to be satisfied.

I've also considered proxying the UNIX domain socket communication
to/from a TAP interface in my own Golang code, but it incurs
significant performance overhead.

On the other hand passt seems to already can do everything I need,
it just needs some guidance on which type of FD it's dealing with.

Solution: introduce --fd-is-tap command-line flag to tell passt
which type of FD it's being passed to and force it to use appropriate
system calls and offset calculation.

This patch also clarifies the -F/--fd description for pasta to note
that we're expecting a TAP device and not a UNIX domain socket.
---
 README.md |  4 ++++
 conf.c    | 14 +++++++++++++-
 passt.c   |  2 ++
 passt.h   |  1 +
 tap.c     |  8 +++++---
 tap.h     |  4 ++--
 6 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 6d00313..a78288f 100644
--- a/README.md
+++ b/README.md
@@ -381,6 +381,10 @@ descriptor that's already opened.
 This approach, compared to using a _tap_ device, doesn't require any security
 capabilities, as we don't need to create any interface.
 
+However, if you already have a _tap_ device opened by other means, you can
+specify `--fd-is-tap` command-line option and _passt_ will treat the file
+descriptor passed in `-F`/`--fd` option as a pre-opened TAP device.
+
 _pasta_ runs out of the box with any recent (post-3.8) Linux kernel.
 
 ## Services
diff --git a/conf.c b/conf.c
index 0ad6e23..d622fdf 100644
--- a/conf.c
+++ b/conf.c
@@ -803,7 +803,12 @@ static void print_usage(const char *name, int status)
 		     UNIX_SOCK_PATH, 1);
 	}
 
-	info(   "  -F, --fd FD		Use FD as pre-opened connected socket");
+	if (strstr(name, "pasta")) {
+		info(   "  -F, --fd FD		Use FD as pre-opened TAP device");
+	} else {
+		info(   "  -F, --fd FD		Use FD as pre-opened and connected UNIX domain socket");
+		info(   "  --fd-is-tap		Treat FD as pre-opened TAP device instead of connected UNIX domain socket");
+	}
 	info(   "  -p, --pcap FILE	Log tap-facing traffic to pcap file");
 	info(   "  -P, --pid FILE	Write own PID to the given file");
 	info(   "  -m, --mtu MTU	Assign MTU via DHCP/NDP");
@@ -1232,6 +1237,7 @@ void conf(struct ctx *c, int argc, char **argv)
 		{"config-net",	no_argument,		NULL,		17 },
 		{"no-copy-routes", no_argument,		NULL,		18 },
 		{"no-copy-addrs", no_argument,		NULL,		19 },
+		{"fd-is-tap",	no_argument,		NULL,		20 },
 		{ 0 },
 	};
 	struct get_bound_ports_ns_arg ns_ports_arg = { .c = c };
@@ -1411,6 +1417,12 @@ void conf(struct ctx *c, int argc, char **argv)
 			warn("--no-copy-addrs will be dropped soon");
 			c->no_copy_addrs = copy_addrs_opt = true;
 			break;
+		case 20:
+			if (c->mode != MODE_PASST)
+				die("--fd-is-tap is for passt mode only");
+
+			c->fd_tap_is_socket = false;
+			break;
 		case 'd':
 			if (c->debug)
 				die("Multiple --debug options given");
diff --git a/passt.c b/passt.c
index 8ddd9b3..b7276ff 100644
--- a/passt.c
+++ b/passt.c
@@ -195,9 +195,11 @@ int main(int argc, char **argv)
 		}
 
 		c.mode = MODE_PASTA;
+		c.fd_tap_is_socket = false;
 		log_name = "pasta";
 	} else if (strstr(name, "passt")) {
 		c.mode = MODE_PASST;
+		c.fd_tap_is_socket = true;
 		log_name = "passt";
 	} else {
 		exit(EXIT_FAILURE);
diff --git a/passt.h b/passt.h
index 282bd1a..2079cd0 100644
--- a/passt.h
+++ b/passt.h
@@ -264,6 +264,7 @@ struct ctx {
 	int epollfd;
 	int fd_tap_listen;
 	int fd_tap;
+	bool fd_tap_is_socket;
 	unsigned char mac[ETH_ALEN];
 	unsigned char mac_guest[ETH_ALEN];
 
diff --git a/tap.c b/tap.c
index 93db989..12b66ca 100644
--- a/tap.c
+++ b/tap.c
@@ -76,7 +76,7 @@ int tap_send(const struct ctx *c, const void *data, size_t len)
 {
 	pcap(data, len);
 
-	if (c->mode == MODE_PASST) {
+	if (c->fd_tap_is_socket) {
 		int flags = MSG_NOSIGNAL | MSG_DONTWAIT;
 		uint32_t vnet_len = htonl(len);
 
@@ -421,7 +421,7 @@ void tap_send_frames(struct ctx *c, const struct iovec *iov, size_t n)
 	if (!n)
 		return;
 
-	if (c->mode == MODE_PASST)
+	if (c->fd_tap_is_socket)
 		m = tap_send_frames_passt(c, iov, n);
 	else
 		m = tap_send_frames_pasta(c, iov, n);
@@ -1176,6 +1176,7 @@ void tap_listen_handler(struct ctx *c, uint32_t events)
 	}
 
 	c->fd_tap = accept4(c->fd_tap_listen, NULL, NULL, 0);
+	c->fd_tap_is_socket = true;
 
 	if (!getsockopt(c->fd_tap, SOL_SOCKET, SO_PEERCRED, &ucred, &len))
 		info("accepted connection from PID %i", ucred.pid);
@@ -1225,6 +1226,7 @@ static int tap_ns_tun(void *arg)
 		die("Tap device opened but no network interface found");
 
 	c->fd_tap = fd;
+	c->fd_tap_is_socket = false;
 
 	return 0;
 }
@@ -1273,7 +1275,7 @@ void tap_sock_init(struct ctx *c)
 
 		ASSERT(c->one_off);
 		ref.fd = c->fd_tap;
-		if (c->mode == MODE_PASST)
+		if (c->fd_tap_is_socket)
 			ref.type = EPOLL_TYPE_TAP_PASST;
 		else
 			ref.type = EPOLL_TYPE_TAP_PASTA;
diff --git a/tap.h b/tap.h
index 021fb7c..3626e49 100644
--- a/tap.h
+++ b/tap.h
@@ -20,7 +20,7 @@ struct tap_hdr {
 
 static inline size_t tap_hdr_len_(const struct ctx *c)
 {
-	if (c->mode == MODE_PASST)
+	if (c->fd_tap_is_socket)
 		return sizeof(struct tap_hdr);
 	else
 		return sizeof(struct ethhdr);
@@ -52,7 +52,7 @@ static inline void *tap_iov_base(const struct ctx *c, struct tap_hdr *taph)
 static inline size_t tap_iov_len(const struct ctx *c, struct tap_hdr *taph,
 				 size_t plen)
 {
-	if (c->mode == MODE_PASST)
+	if (c->fd_tap_is_socket)
 		taph->vnet_len = htonl(plen + sizeof(taph->eh));
 	return plen + tap_hdr_len_(c);
 }
-- 
@@ -20,7 +20,7 @@ struct tap_hdr {
 
 static inline size_t tap_hdr_len_(const struct ctx *c)
 {
-	if (c->mode == MODE_PASST)
+	if (c->fd_tap_is_socket)
 		return sizeof(struct tap_hdr);
 	else
 		return sizeof(struct ethhdr);
@@ -52,7 +52,7 @@ static inline void *tap_iov_base(const struct ctx *c, struct tap_hdr *taph)
 static inline size_t tap_iov_len(const struct ctx *c, struct tap_hdr *taph,
 				 size_t plen)
 {
-	if (c->mode == MODE_PASST)
+	if (c->fd_tap_is_socket)
 		taph->vnet_len = htonl(plen + sizeof(taph->eh));
 	return plen + tap_hdr_len_(c);
 }
-- 
2.39.2 (Apple Git-144)


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] passt: introduce --fd-is-tap to allow passing TAP file descriptor
  2023-09-15 14:21 [PATCH] passt: introduce --fd-is-tap to allow passing TAP file descriptor Nikolay Edigaryev
@ 2023-09-16 12:34 ` Stefano Brivio
  2023-09-18  2:23   ` David Gibson
                     ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Stefano Brivio @ 2023-09-16 12:34 UTC (permalink / raw)
  To: Nikolay Edigaryev; +Cc: passt-dev, David Gibson

Hi Nikolay,

On Fri, 15 Sep 2023 18:21:52 +0400
Nikolay Edigaryev <edigaryev@gmail.com> wrote:

> Problem: I have a Cloud Hypervisor virtual machine that needs both
> (1) an internet access without fiddling with iptables/Netfilter and
> (2) VM <-> host access (to be able to provision this VM over SSH)
> without dealing with passt port forwarding it doesn't seem to be
> possible to map the whole IP address, yet the users expect an IP
> instead of IP:port combination.
> 
> Requirement #1 is why I've choosen passt and it's pretty much
> satisfied (thank you for this great piece of software!).

And thanks for the patches! I'm glad to hear it's useful for you (and
with Cloud Hypervisor :)).

Two comments:

> Requirement #2 implies some kind of bridge interface on the host
> with one TAP interface for the VM and the other for the passt.
> 
> However, only pasta can accept TAP interface FD's in it's -F/--fd,
> which is OK, but it also configures unneeded namespacing, which in
> turn results in unneeded complexity and performance overhead due
> to the need of involving veth pairs to break away from the pasta
> namespace to the host for the requirement #2 to be satisfied.
> 
> I've also considered proxying the UNIX domain socket communication
> to/from a TAP interface in my own Golang code, but it incurs
> significant performance overhead.
> 
> On the other hand passt seems to already can do everything I need,
> it just needs some guidance on which type of FD it's dealing with.
> 
> Solution: introduce --fd-is-tap command-line flag to tell passt
> which type of FD it's being passed to and force it to use appropriate
> system calls and offset calculation.

Did you consider adding another parameter altogether, such as --tap-fd?

I'm asking because we recently got a request to add another (similar)
interface on that "side", that is, a VSOCK file descriptor, for usage
with podman-machine. At that point, a further --fd-is-vsock would look
a bit awkward.

Further, David Gibson is working on a generalised flow table approach
which *should* also allow us to have multiple "taps"... and at that
point, somebody might want to pass multiple "--tap-fd" or -F options.

I didn't really evaluate if there are drawbacks to that, though --
maybe it's a lot more code.

> This patch also clarifies the -F/--fd description for pasta to note
> that we're expecting a TAP device and not a UNIX domain socket.

You should add a Signed-off-by tag here (but in general I can fix up
tags myself on merge). Other than that, the patch looks good to me in a
general sense.

-- 
Stefano


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] passt: introduce --fd-is-tap to allow passing TAP file descriptor
  2023-09-16 12:34 ` Stefano Brivio
@ 2023-09-18  2:23   ` David Gibson
  2023-09-18 13:37   ` [PATCH] passt: introduce --tap-fd " Nikolay Edigaryev
  2023-09-18 13:44   ` [PATCH] passt: introduce --fd-is-tap " Nikolay Edigaryev
  2 siblings, 0 replies; 6+ messages in thread
From: David Gibson @ 2023-09-18  2:23 UTC (permalink / raw)
  To: Stefano Brivio; +Cc: Nikolay Edigaryev, passt-dev

[-- Attachment #1: Type: text/plain, Size: 3211 bytes --]

On Sat, Sep 16, 2023 at 02:34:06PM +0200, Stefano Brivio wrote:
> Hi Nikolay,
> 
> On Fri, 15 Sep 2023 18:21:52 +0400
> Nikolay Edigaryev <edigaryev@gmail.com> wrote:
> 
> > Problem: I have a Cloud Hypervisor virtual machine that needs both
> > (1) an internet access without fiddling with iptables/Netfilter and
> > (2) VM <-> host access (to be able to provision this VM over SSH)
> > without dealing with passt port forwarding it doesn't seem to be
> > possible to map the whole IP address, yet the users expect an IP
> > instead of IP:port combination.
> > 
> > Requirement #1 is why I've choosen passt and it's pretty much
> > satisfied (thank you for this great piece of software!).
> 
> And thanks for the patches! I'm glad to hear it's useful for you (and
> with Cloud Hypervisor :)).
> 
> Two comments:
> 
> > Requirement #2 implies some kind of bridge interface on the host
> > with one TAP interface for the VM and the other for the passt.
> > 
> > However, only pasta can accept TAP interface FD's in it's -F/--fd,
> > which is OK, but it also configures unneeded namespacing, which in
> > turn results in unneeded complexity and performance overhead due
> > to the need of involving veth pairs to break away from the pasta
> > namespace to the host for the requirement #2 to be satisfied.
> > 
> > I've also considered proxying the UNIX domain socket communication
> > to/from a TAP interface in my own Golang code, but it incurs
> > significant performance overhead.
> > 
> > On the other hand passt seems to already can do everything I need,
> > it just needs some guidance on which type of FD it's dealing with.
> > 
> > Solution: introduce --fd-is-tap command-line flag to tell passt
> > which type of FD it's being passed to and force it to use appropriate
> > system calls and offset calculation.
> 
> Did you consider adding another parameter altogether, such as --tap-fd?
> 
> I'm asking because we recently got a request to add another (similar)
> interface on that "side", that is, a VSOCK file descriptor, for usage
> with podman-machine. At that point, a further --fd-is-vsock would look
> a bit awkward.
> 
> Further, David Gibson is working on a generalised flow table approach
> which *should* also allow us to have multiple "taps"... and at that
> point, somebody might want to pass multiple "--tap-fd" or -F options.
> 
> I didn't really evaluate if there are drawbacks to that, though --
> maybe it's a lot more code.


I second that point.  I think having a different option for passing an
fd is a much better interface design than having a secondary option
which affects the interpretation of another one.

> > This patch also clarifies the -F/--fd description for pasta to note
> > that we're expecting a TAP device and not a UNIX domain socket.
> 
> You should add a Signed-off-by tag here (but in general I can fix up
> tags myself on merge). Other than that, the patch looks good to me in a
> general sense.
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] passt: introduce --tap-fd to allow passing TAP file descriptor
  2023-09-16 12:34 ` Stefano Brivio
  2023-09-18  2:23   ` David Gibson
@ 2023-09-18 13:37   ` Nikolay Edigaryev
  2023-09-19  0:45     ` David Gibson
  2023-09-18 13:44   ` [PATCH] passt: introduce --fd-is-tap " Nikolay Edigaryev
  2 siblings, 1 reply; 6+ messages in thread
From: Nikolay Edigaryev @ 2023-09-18 13:37 UTC (permalink / raw)
  To: passt-dev; +Cc: Nikolay Edigaryev

Problem: I have a Cloud Hypervisor virtual machine that needs both
(1) an internet access without fiddling with iptables/Netfilter and
(2) VM <-> host access (to be able to provision this VM over SSH
without dealing with passt port forwarding, as it doesn't seem to be
possible to map the whole IP address, yet the users expect an IP
instead of IP:port combination).

Requirement #1 is why I've choosen passt and it's pretty much
satisfied (thank you for this great piece of software!).

Requirement #2 implies some kind of bridge interface on the host
with one TAP interface for the VM and the other for the passt.

However, only pasta can accept TAP interface FD's in it's -F/--fd,
which is OK, but it also configures unneeded namespacing, which in
turn results in unneeded complexity and performance overhead due
to the need of involving veth pairs to break away from the pasta
namespace to the host for the requirement #2 to be satisfied.

I've also considered proxying the UNIX domain socket communication
to/from a TAP interface in my own Golang code, but it incurs
significant performance overhead.

On the other hand passt seems to already can do everything I need,
it just needs some guidance on which type of FD it's dealing with.

Solution: introduce --tap-fd command-line flag to tell passt that we're
passing a TAP FD instead of a UNIX domain socket FD, which will in turn
use appropriate system calls and offset calculaton for that FD.

This patch also clarifies the -F/--fd description for pasta to note
that we're expecting a TAP device and not a UNIX domain socket.

Signed-off-by: Nikolay Edigaryev <edigaryev@gmail.com>
---
 README.md |  5 +++++
 conf.c    | 37 ++++++++++++++++++++++++++++++++++---
 passt.c   |  2 ++
 passt.h   |  1 +
 tap.c     |  8 +++++---
 tap.h     |  4 ++--
 6 files changed, 49 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 6d00313..760720c 100644
--- a/README.md
+++ b/README.md
@@ -381,6 +381,11 @@ descriptor that's already opened.
 This approach, compared to using a _tap_ device, doesn't require any security
 capabilities, as we don't need to create any interface.
 
+However, if you already have a _tap_ device opened by other means, you can
+pass it in the `--tap-fd` command-line option and _passt_ will use correct
+system calls and won't send or expect additional QEMU-specific headers for
+each packet as it does with the UNIX domain socket.
+
 _pasta_ runs out of the box with any recent (post-3.8) Linux kernel.
 
 ## Services
diff --git a/conf.c b/conf.c
index 0ad6e23..b182904 100644
--- a/conf.c
+++ b/conf.c
@@ -803,7 +803,12 @@ static void print_usage(const char *name, int status)
 		     UNIX_SOCK_PATH, 1);
 	}
 
-	info(   "  -F, --fd FD		Use FD as pre-opened connected socket");
+	if (strstr(name, "pasta")) {
+		info(   "  -F, --fd FD		Use FD as pre-opened TAP device");
+	} else {
+		info(   "  -F, --fd FD		Use FD as pre-opened and connected UNIX domain socket");
+		info(   "  --tap-fd		Use FD as pre-opened TAP device");
+	}
 	info(   "  -p, --pcap FILE	Log tap-facing traffic to pcap file");
 	info(   "  -P, --pid FILE	Write own PID to the given file");
 	info(   "  -m, --mtu MTU	Assign MTU via DHCP/NDP");
@@ -1232,6 +1237,7 @@ void conf(struct ctx *c, int argc, char **argv)
 		{"config-net",	no_argument,		NULL,		17 },
 		{"no-copy-routes", no_argument,		NULL,		18 },
 		{"no-copy-addrs", no_argument,		NULL,		19 },
+		{"tap-fd", required_argument,		NULL,		20 },
 		{ 0 },
 	};
 	struct get_bound_ports_ns_arg ns_ports_arg = { .c = c };
@@ -1410,6 +1416,27 @@ void conf(struct ctx *c, int argc, char **argv)
 
 			warn("--no-copy-addrs will be dropped soon");
 			c->no_copy_addrs = copy_addrs_opt = true;
+			break;
+		case 20:
+			if (c->mode != MODE_PASST)
+				die("--tap-fd is for passt mode only");
+
+			if (c->fd_tap >= 0) {
+				if (c->mode == MODE_PASTA)
+					die("Multiple --fd options given");
+				else
+					die("Multiple --fd/--tap-fd options given");
+			}
+
+			errno = 0;
+			c->fd_tap = strtol(optarg, NULL, 0);
+
+			if (c->fd_tap < 0 || errno)
+				die("Invalid --tap-fd: %s", optarg);
+
+			c->one_off = true;
+			c->fd_tap_is_socket = false;
+
 			break;
 		case 'd':
 			if (c->debug)
@@ -1464,8 +1491,12 @@ void conf(struct ctx *c, int argc, char **argv)
 
 			break;
 		case 'F':
-			if (c->fd_tap >= 0)
-				die("Multiple --fd options given");
+			if (c->fd_tap >= 0) {
+				if (c->mode == MODE_PASTA)
+					die("Multiple --fd options given");
+				else
+					die("Multiple --fd/--tap-fd options given");
+			}
 
 			errno = 0;
 			c->fd_tap = strtol(optarg, NULL, 0);
diff --git a/passt.c b/passt.c
index 8ddd9b3..b7276ff 100644
--- a/passt.c
+++ b/passt.c
@@ -195,9 +195,11 @@ int main(int argc, char **argv)
 		}
 
 		c.mode = MODE_PASTA;
+		c.fd_tap_is_socket = false;
 		log_name = "pasta";
 	} else if (strstr(name, "passt")) {
 		c.mode = MODE_PASST;
+		c.fd_tap_is_socket = true;
 		log_name = "passt";
 	} else {
 		exit(EXIT_FAILURE);
diff --git a/passt.h b/passt.h
index 282bd1a..2079cd0 100644
--- a/passt.h
+++ b/passt.h
@@ -264,6 +264,7 @@ struct ctx {
 	int epollfd;
 	int fd_tap_listen;
 	int fd_tap;
+	bool fd_tap_is_socket;
 	unsigned char mac[ETH_ALEN];
 	unsigned char mac_guest[ETH_ALEN];
 
diff --git a/tap.c b/tap.c
index 93db989..12b66ca 100644
--- a/tap.c
+++ b/tap.c
@@ -76,7 +76,7 @@ int tap_send(const struct ctx *c, const void *data, size_t len)
 {
 	pcap(data, len);
 
-	if (c->mode == MODE_PASST) {
+	if (c->fd_tap_is_socket) {
 		int flags = MSG_NOSIGNAL | MSG_DONTWAIT;
 		uint32_t vnet_len = htonl(len);
 
@@ -421,7 +421,7 @@ void tap_send_frames(struct ctx *c, const struct iovec *iov, size_t n)
 	if (!n)
 		return;
 
-	if (c->mode == MODE_PASST)
+	if (c->fd_tap_is_socket)
 		m = tap_send_frames_passt(c, iov, n);
 	else
 		m = tap_send_frames_pasta(c, iov, n);
@@ -1176,6 +1176,7 @@ void tap_listen_handler(struct ctx *c, uint32_t events)
 	}
 
 	c->fd_tap = accept4(c->fd_tap_listen, NULL, NULL, 0);
+	c->fd_tap_is_socket = true;
 
 	if (!getsockopt(c->fd_tap, SOL_SOCKET, SO_PEERCRED, &ucred, &len))
 		info("accepted connection from PID %i", ucred.pid);
@@ -1225,6 +1226,7 @@ static int tap_ns_tun(void *arg)
 		die("Tap device opened but no network interface found");
 
 	c->fd_tap = fd;
+	c->fd_tap_is_socket = false;
 
 	return 0;
 }
@@ -1273,7 +1275,7 @@ void tap_sock_init(struct ctx *c)
 
 		ASSERT(c->one_off);
 		ref.fd = c->fd_tap;
-		if (c->mode == MODE_PASST)
+		if (c->fd_tap_is_socket)
 			ref.type = EPOLL_TYPE_TAP_PASST;
 		else
 			ref.type = EPOLL_TYPE_TAP_PASTA;
diff --git a/tap.h b/tap.h
index 021fb7c..3626e49 100644
--- a/tap.h
+++ b/tap.h
@@ -20,7 +20,7 @@ struct tap_hdr {
 
 static inline size_t tap_hdr_len_(const struct ctx *c)
 {
-	if (c->mode == MODE_PASST)
+	if (c->fd_tap_is_socket)
 		return sizeof(struct tap_hdr);
 	else
 		return sizeof(struct ethhdr);
@@ -52,7 +52,7 @@ static inline void *tap_iov_base(const struct ctx *c, struct tap_hdr *taph)
 static inline size_t tap_iov_len(const struct ctx *c, struct tap_hdr *taph,
 				 size_t plen)
 {
-	if (c->mode == MODE_PASST)
+	if (c->fd_tap_is_socket)
 		taph->vnet_len = htonl(plen + sizeof(taph->eh));
 	return plen + tap_hdr_len_(c);
 }
-- 
@@ -20,7 +20,7 @@ struct tap_hdr {
 
 static inline size_t tap_hdr_len_(const struct ctx *c)
 {
-	if (c->mode == MODE_PASST)
+	if (c->fd_tap_is_socket)
 		return sizeof(struct tap_hdr);
 	else
 		return sizeof(struct ethhdr);
@@ -52,7 +52,7 @@ static inline void *tap_iov_base(const struct ctx *c, struct tap_hdr *taph)
 static inline size_t tap_iov_len(const struct ctx *c, struct tap_hdr *taph,
 				 size_t plen)
 {
-	if (c->mode == MODE_PASST)
+	if (c->fd_tap_is_socket)
 		taph->vnet_len = htonl(plen + sizeof(taph->eh));
 	return plen + tap_hdr_len_(c);
 }
-- 
2.39.2 (Apple Git-144)


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] passt: introduce --fd-is-tap to allow passing TAP file descriptor
  2023-09-16 12:34 ` Stefano Brivio
  2023-09-18  2:23   ` David Gibson
  2023-09-18 13:37   ` [PATCH] passt: introduce --tap-fd " Nikolay Edigaryev
@ 2023-09-18 13:44   ` Nikolay Edigaryev
  2 siblings, 0 replies; 6+ messages in thread
From: Nikolay Edigaryev @ 2023-09-18 13:44 UTC (permalink / raw)
  To: Stefano Brivio; +Cc: passt-dev, David Gibson

Stefano, David, thanks for the input!

I've submitted a new patch completely replacing the previous one
(because I forgot to add --signoff), but only changes the command-line
arguments part as suggested.

The rest is kept the same because I'm not sure if introducing more
than one FD in the passt's context will make sense at this time.

On Sat, Sep 16, 2023 at 4:34 PM Stefano Brivio <sbrivio@redhat.com> wrote:
>
> Hi Nikolay,
>
> On Fri, 15 Sep 2023 18:21:52 +0400
> Nikolay Edigaryev <edigaryev@gmail.com> wrote:
>
> > Problem: I have a Cloud Hypervisor virtual machine that needs both
> > (1) an internet access without fiddling with iptables/Netfilter and
> > (2) VM <-> host access (to be able to provision this VM over SSH)
> > without dealing with passt port forwarding it doesn't seem to be
> > possible to map the whole IP address, yet the users expect an IP
> > instead of IP:port combination.
> >
> > Requirement #1 is why I've choosen passt and it's pretty much
> > satisfied (thank you for this great piece of software!).
>
> And thanks for the patches! I'm glad to hear it's useful for you (and
> with Cloud Hypervisor :)).
>
> Two comments:
>
> > Requirement #2 implies some kind of bridge interface on the host
> > with one TAP interface for the VM and the other for the passt.
> >
> > However, only pasta can accept TAP interface FD's in it's -F/--fd,
> > which is OK, but it also configures unneeded namespacing, which in
> > turn results in unneeded complexity and performance overhead due
> > to the need of involving veth pairs to break away from the pasta
> > namespace to the host for the requirement #2 to be satisfied.
> >
> > I've also considered proxying the UNIX domain socket communication
> > to/from a TAP interface in my own Golang code, but it incurs
> > significant performance overhead.
> >
> > On the other hand passt seems to already can do everything I need,
> > it just needs some guidance on which type of FD it's dealing with.
> >
> > Solution: introduce --fd-is-tap command-line flag to tell passt
> > which type of FD it's being passed to and force it to use appropriate
> > system calls and offset calculation.
>
> Did you consider adding another parameter altogether, such as --tap-fd?
>
> I'm asking because we recently got a request to add another (similar)
> interface on that "side", that is, a VSOCK file descriptor, for usage
> with podman-machine. At that point, a further --fd-is-vsock would look
> a bit awkward.
>
> Further, David Gibson is working on a generalised flow table approach
> which *should* also allow us to have multiple "taps"... and at that
> point, somebody might want to pass multiple "--tap-fd" or -F options.
>
> I didn't really evaluate if there are drawbacks to that, though --
> maybe it's a lot more code.
>
> > This patch also clarifies the -F/--fd description for pasta to note
> > that we're expecting a TAP device and not a UNIX domain socket.
>
> You should add a Signed-off-by tag here (but in general I can fix up
> tags myself on merge). Other than that, the patch looks good to me in a
> general sense.
>
> --
> Stefano
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] passt: introduce --tap-fd to allow passing TAP file descriptor
  2023-09-18 13:37   ` [PATCH] passt: introduce --tap-fd " Nikolay Edigaryev
@ 2023-09-19  0:45     ` David Gibson
  0 siblings, 0 replies; 6+ messages in thread
From: David Gibson @ 2023-09-19  0:45 UTC (permalink / raw)
  To: Nikolay Edigaryev; +Cc: passt-dev

[-- Attachment #1: Type: text/plain, Size: 10471 bytes --]

On Mon, Sep 18, 2023 at 05:37:21PM +0400, Nikolay Edigaryev wrote:
> Problem: I have a Cloud Hypervisor virtual machine that needs both
> (1) an internet access without fiddling with iptables/Netfilter and
> (2) VM <-> host access (to be able to provision this VM over SSH
> without dealing with passt port forwarding, as it doesn't seem to be
> possible to map the whole IP address, yet the users expect an IP
> instead of IP:port combination).
> 
> Requirement #1 is why I've choosen passt and it's pretty much
> satisfied (thank you for this great piece of software!).
> 
> Requirement #2 implies some kind of bridge interface on the host
> with one TAP interface for the VM and the other for the passt.

Huh.. ok.  I hadn't quite registered that part of this is you're using
a tap device, rather than qemu socket, for *passt* rather than pasta,
which is not something we've tried before.  I'm slightly surprised
that you didn't more problems than these.

Fwiw, I do think the fact that tap-vs-socket is currently tied to
pasta-vs-passt is not really correct and have plans to remove that
assumption more cleanly.  That's a middling way down the track though,
so I'm not necessarily opposed to a stopgap approach.  If possible I'd
like to keep the user-visible options in such a way that they don't
make implementing that future approach harder, of course.

> However, only pasta can accept TAP interface FD's in it's -F/--fd,
> which is OK, but it also configures unneeded namespacing, which in
> turn results in unneeded complexity and performance overhead due
> to the need of involving veth pairs to break away from the pasta
> namespace to the host for the requirement #2 to be satisfied.

Huh, ok, I hadn't considered that angle.

A couple of preliminary thoughts:

 * Since #2 (AFAICT) requires root on the host to set up the bridge,
   etc., why is using passt still advantageous over just doing all the
   networking over a bridge?

 * What do you need from #2 that isn't covered by passt's map-gw
   limited NAT behaviour?

> I've also considered proxying the UNIX domain socket communication
> to/from a TAP interface in my own Golang code, but it incurs
> significant performance overhead.

Yeah, that sounds messy and counter-productive.

> On the other hand passt seems to already can do everything I need,
> it just needs some guidance on which type of FD it's dealing with.
> 
> Solution: introduce --tap-fd command-line flag to tell passt that we're
> passing a TAP FD instead of a UNIX domain socket FD, which will in turn
> use appropriate system calls and offset calculaton for that FD.
> 
> This patch also clarifies the -F/--fd description for pasta to note
> that we're expecting a TAP device and not a UNIX domain socket.
> 
> Signed-off-by: Nikolay Edigaryev <edigaryev@gmail.com>
> ---
>  README.md |  5 +++++
>  conf.c    | 37 ++++++++++++++++++++++++++++++++++---
>  passt.c   |  2 ++
>  passt.h   |  1 +
>  tap.c     |  8 +++++---
>  tap.h     |  4 ++--
>  6 files changed, 49 insertions(+), 8 deletions(-)
> 
> diff --git a/README.md b/README.md
> index 6d00313..760720c 100644
> --- a/README.md
> +++ b/README.md
> @@ -381,6 +381,11 @@ descriptor that's already opened.
>  This approach, compared to using a _tap_ device, doesn't require any security
>  capabilities, as we don't need to create any interface.
>  
> +However, if you already have a _tap_ device opened by other means, you can
> +pass it in the `--tap-fd` command-line option and _passt_ will use correct
> +system calls and won't send or expect additional QEMU-specific headers for
> +each packet as it does with the UNIX domain socket.
> +

I can follow what this is saying in the context of this commit, but
I'm not sure it will really make sense solely in the context of where
it sits in the README.  Not immediately sure how to improve it, alas.

>  _pasta_ runs out of the box with any recent (post-3.8) Linux kernel.
>  
>  ## Services
> diff --git a/conf.c b/conf.c
> index 0ad6e23..b182904 100644
> --- a/conf.c
> +++ b/conf.c
> @@ -803,7 +803,12 @@ static void print_usage(const char *name, int status)
>  		     UNIX_SOCK_PATH, 1);
>  	}
>  
> -	info(   "  -F, --fd FD		Use FD as pre-opened connected socket");
> +	if (strstr(name, "pasta")) {
> +		info(   "  -F, --fd FD		Use FD as pre-opened TAP device");
> +	} else {
> +		info(   "  -F, --fd FD		Use FD as pre-opened and connected UNIX domain socket");
> +		info(   "  --tap-fd		Use FD as pre-opened TAP device");
> +	}

Huh... so, I didn't fully think through this suggestion in the context
of this being for passt rather than pasta.  This kind of highlights a
bit of ugliness: -F for pasta and -F for passt have different
meanings, but -F for pasta has the same meaning as --tap-fd for passt.
Sort of, anyway.

Reconsidering this, I think a better approach would be to use just a
single -F/--fd option, but extend the format of the 'FD' string
itself.  So, say:
	-F tap:17
vs
	-F qsock:17

If the fd type isn't specified, we can default to one or the other
differently for pasta and passt, to maintain compatibility.  Because
the fd is just digits, the various cases shouldn't have any ambiguity.
I think that notion of a "type-tagged" fd could also be made use of in
the future extensions I have in mind.

Stefano, does that seem reasonable to you?

>  	info(   "  -p, --pcap FILE	Log tap-facing traffic to pcap file");
>  	info(   "  -P, --pid FILE	Write own PID to the given file");
>  	info(   "  -m, --mtu MTU	Assign MTU via DHCP/NDP");
> @@ -1232,6 +1237,7 @@ void conf(struct ctx *c, int argc, char **argv)
>  		{"config-net",	no_argument,		NULL,		17 },
>  		{"no-copy-routes", no_argument,		NULL,		18 },
>  		{"no-copy-addrs", no_argument,		NULL,		19 },
> +		{"tap-fd", required_argument,		NULL,		20 },
>  		{ 0 },
>  	};
>  	struct get_bound_ports_ns_arg ns_ports_arg = { .c = c };
> @@ -1410,6 +1416,27 @@ void conf(struct ctx *c, int argc, char **argv)
>  
>  			warn("--no-copy-addrs will be dropped soon");
>  			c->no_copy_addrs = copy_addrs_opt = true;
> +			break;
> +		case 20:
> +			if (c->mode != MODE_PASST)
> +				die("--tap-fd is for passt mode only");
> +
> +			if (c->fd_tap >= 0) {
> +				if (c->mode == MODE_PASTA)
> +					die("Multiple --fd options given");
> +				else
> +					die("Multiple --fd/--tap-fd options given");
> +			}
> +
> +			errno = 0;
> +			c->fd_tap = strtol(optarg, NULL, 0);
> +
> +			if (c->fd_tap < 0 || errno)
> +				die("Invalid --tap-fd: %s", optarg);
> +
> +			c->one_off = true;
> +			c->fd_tap_is_socket = false;

This effectively global boolean flag is ugly, but because it's not
directly user visible, I'm ok with that for the time being.  I'm happy
to subsume it in more general changes once I get to them.

> +
>  			break;
>  		case 'd':
>  			if (c->debug)
> @@ -1464,8 +1491,12 @@ void conf(struct ctx *c, int argc, char **argv)
>  
>  			break;
>  		case 'F':
> -			if (c->fd_tap >= 0)
> -				die("Multiple --fd options given");
> +			if (c->fd_tap >= 0) {
> +				if (c->mode == MODE_PASTA)
> +					die("Multiple --fd options given");
> +				else
> +					die("Multiple --fd/--tap-fd options given");
> +			}
>  
>  			errno = 0;
>  			c->fd_tap = strtol(optarg, NULL, 0);
> diff --git a/passt.c b/passt.c
> index 8ddd9b3..b7276ff 100644
> --- a/passt.c
> +++ b/passt.c
> @@ -195,9 +195,11 @@ int main(int argc, char **argv)
>  		}
>  
>  		c.mode = MODE_PASTA;
> +		c.fd_tap_is_socket = false;
>  		log_name = "pasta";
>  	} else if (strstr(name, "passt")) {
>  		c.mode = MODE_PASST;
> +		c.fd_tap_is_socket = true;
>  		log_name = "passt";
>  	} else {
>  		exit(EXIT_FAILURE);
> diff --git a/passt.h b/passt.h
> index 282bd1a..2079cd0 100644
> --- a/passt.h
> +++ b/passt.h
> @@ -264,6 +264,7 @@ struct ctx {
>  	int epollfd;
>  	int fd_tap_listen;
>  	int fd_tap;
> +	bool fd_tap_is_socket;
>  	unsigned char mac[ETH_ALEN];
>  	unsigned char mac_guest[ETH_ALEN];
>  
> diff --git a/tap.c b/tap.c
> index 93db989..12b66ca 100644
> --- a/tap.c
> +++ b/tap.c
> @@ -76,7 +76,7 @@ int tap_send(const struct ctx *c, const void *data, size_t len)
>  {
>  	pcap(data, len);
>  
> -	if (c->mode == MODE_PASST) {
> +	if (c->fd_tap_is_socket) {
>  		int flags = MSG_NOSIGNAL | MSG_DONTWAIT;
>  		uint32_t vnet_len = htonl(len);
>  
> @@ -421,7 +421,7 @@ void tap_send_frames(struct ctx *c, const struct iovec *iov, size_t n)
>  	if (!n)
>  		return;
>  
> -	if (c->mode == MODE_PASST)
> +	if (c->fd_tap_is_socket)
>  		m = tap_send_frames_passt(c, iov, n);
>  	else
>  		m = tap_send_frames_pasta(c, iov, n);
> @@ -1176,6 +1176,7 @@ void tap_listen_handler(struct ctx *c, uint32_t events)
>  	}
>  
>  	c->fd_tap = accept4(c->fd_tap_listen, NULL, NULL, 0);
> +	c->fd_tap_is_socket = true;
>  
>  	if (!getsockopt(c->fd_tap, SOL_SOCKET, SO_PEERCRED, &ucred, &len))
>  		info("accepted connection from PID %i", ucred.pid);
> @@ -1225,6 +1226,7 @@ static int tap_ns_tun(void *arg)
>  		die("Tap device opened but no network interface found");
>  
>  	c->fd_tap = fd;
> +	c->fd_tap_is_socket = false;
>  
>  	return 0;
>  }
> @@ -1273,7 +1275,7 @@ void tap_sock_init(struct ctx *c)
>  
>  		ASSERT(c->one_off);
>  		ref.fd = c->fd_tap;
> -		if (c->mode == MODE_PASST)
> +		if (c->fd_tap_is_socket)
>  			ref.type = EPOLL_TYPE_TAP_PASST;
>  		else
>  			ref.type = EPOLL_TYPE_TAP_PASTA;
> diff --git a/tap.h b/tap.h
> index 021fb7c..3626e49 100644
> --- a/tap.h
> +++ b/tap.h
> @@ -20,7 +20,7 @@ struct tap_hdr {
>  
>  static inline size_t tap_hdr_len_(const struct ctx *c)
>  {
> -	if (c->mode == MODE_PASST)
> +	if (c->fd_tap_is_socket)
>  		return sizeof(struct tap_hdr);
>  	else
>  		return sizeof(struct ethhdr);
> @@ -52,7 +52,7 @@ static inline void *tap_iov_base(const struct ctx *c, struct tap_hdr *taph)
>  static inline size_t tap_iov_len(const struct ctx *c, struct tap_hdr *taph,
>  				 size_t plen)
>  {
> -	if (c->mode == MODE_PASST)
> +	if (c->fd_tap_is_socket)
>  		taph->vnet_len = htonl(plen + sizeof(taph->eh));
>  	return plen + tap_hdr_len_(c);
>  }

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-09-19  0:46 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-15 14:21 [PATCH] passt: introduce --fd-is-tap to allow passing TAP file descriptor Nikolay Edigaryev
2023-09-16 12:34 ` Stefano Brivio
2023-09-18  2:23   ` David Gibson
2023-09-18 13:37   ` [PATCH] passt: introduce --tap-fd " Nikolay Edigaryev
2023-09-19  0:45     ` David Gibson
2023-09-18 13:44   ` [PATCH] passt: introduce --fd-is-tap " Nikolay Edigaryev

Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).