public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: Laurent Vivier <lvivier@redhat.com>
To: passt-dev@passt.top
Cc: Laurent Vivier <lvivier@redhat.com>
Subject: [PATCH 20/24] vhost-user: add vhost-user
Date: Fri,  2 Feb 2024 15:11:47 +0100	[thread overview]
Message-ID: <20240202141151.3762941-21-lvivier@redhat.com> (raw)
In-Reply-To: <20240202141151.3762941-1-lvivier@redhat.com>

add virtio and vhost-user functions to connect with QEMU.

  $ ./passt --vhost-user

and

  # qemu-system-x86_64 ... -m 4G \
        -object memory-backend-memfd,id=memfd0,share=on,size=4G \
        -numa node,memdev=memfd0 \
        -chardev socket,id=chr0,path=/tmp/passt_1.socket \
        -netdev vhost-user,id=netdev0,chardev=chr0 \
        -device virtio-net,mac=9a:2b:2c:2d:2e:2f,netdev=netdev0 \
        ...

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
 conf.c  | 20 ++++++++++++++--
 passt.c |  7 ++++++
 passt.h |  1 +
 tap.c   | 73 ++++++++++++++++++++++++++++++++++++++++++---------------
 tcp.c   |  8 +++++--
 udp.c   |  6 +++--
 6 files changed, 90 insertions(+), 25 deletions(-)

diff --git a/conf.c b/conf.c
index b6a2a1f0fdc3..40aa9519f8a6 100644
--- a/conf.c
+++ b/conf.c
@@ -44,6 +44,7 @@
 #include "lineread.h"
 #include "isolation.h"
 #include "log.h"
+#include "vhost_user.h"
 
 /**
  * next_chunk - Return the next piece of a string delimited by a character
@@ -735,9 +736,12 @@ static void print_usage(const char *name, int status)
 		info(   "  -I, --ns-ifname NAME	namespace interface name");
 		info(   "    default: same interface name as external one");
 	} else {
-		info(   "  -s, --socket PATH	UNIX domain socket path");
+		info(   "  -s, --socket, --socket-path PATH	UNIX domain socket path");
 		info(   "    default: probe free path starting from "
 		     UNIX_SOCK_PATH, 1);
+		info(   "  --vhost-user		Enable vhost-user mode");
+		info(   "    UNIX domain socket is provided by -s option");
+		info(   "  --print-capabilities	print back-end capabilities in JSON format");
 	}
 
 	info(   "  -F, --fd FD		Use FD as pre-opened connected socket");
@@ -1123,6 +1127,7 @@ void conf(struct ctx *c, int argc, char **argv)
 		{"help",	no_argument,		NULL,		'h' },
 		{"socket",	required_argument,	NULL,		's' },
 		{"fd",		required_argument,	NULL,		'F' },
+		{"socket-path",	required_argument,	NULL,		's' }, /* vhost-user mandatory */
 		{"ns-ifname",	required_argument,	NULL,		'I' },
 		{"pcap",	required_argument,	NULL,		'p' },
 		{"pid",		required_argument,	NULL,		'P' },
@@ -1169,6 +1174,8 @@ void conf(struct ctx *c, int argc, char **argv)
 		{"config-net",	no_argument,		NULL,		17 },
 		{"no-copy-routes", no_argument,		NULL,		18 },
 		{"no-copy-addrs", no_argument,		NULL,		19 },
+		{"vhost-user",	no_argument,		NULL,		20 },
+		{"print-capabilities", no_argument,	NULL,		21 }, /* vhost-user mandatory */
 		{ 0 },
 	};
 	char userns[PATH_MAX] = { 0 }, netns[PATH_MAX] = { 0 };
@@ -1328,7 +1335,6 @@ void conf(struct ctx *c, int argc, char **argv)
 				       sizeof(c->ip6.ifname_out), "%s", optarg);
 			if (ret <= 0 || ret >= (int)sizeof(c->ip6.ifname_out))
 				die("Invalid interface name: %s", optarg);
-
 			break;
 		case 17:
 			if (c->mode != MODE_PASTA)
@@ -1350,6 +1356,16 @@ void conf(struct ctx *c, int argc, char **argv)
 			warn("--no-copy-addrs will be dropped soon");
 			c->no_copy_addrs = copy_addrs_opt = true;
 			break;
+		case 20:
+			if (c->mode == MODE_PASTA) {
+				err("--vhost-user is for passt mode only");
+				usage(argv[0]);
+			}
+			c->mode = MODE_VU;
+			break;
+		case 21:
+			vu_print_capabilities();
+			break;
 		case 'd':
 			if (c->debug)
 				die("Multiple --debug options given");
diff --git a/passt.c b/passt.c
index 95034d73381f..952aded12848 100644
--- a/passt.c
+++ b/passt.c
@@ -282,6 +282,7 @@ int main(int argc, char **argv)
 	quit_fd = pasta_netns_quit_init(&c);
 
 	tap_sock_init(&c);
+	vu_init(&c);
 
 	secret_init(&c);
 
@@ -399,6 +400,12 @@ loop:
 		case EPOLL_TYPE_ICMPV6:
 			icmp_sock_handler(&c, AF_INET6, ref);
 			break;
+		case EPOLL_TYPE_VHOST_CMD:
+			tap_handler_vu(&c, eventmask);
+			break;
+		case EPOLL_TYPE_VHOST_KICK:
+			vu_kick_cb(&c, ref);
+			break;
 		default:
 			/* Can't happen */
 			ASSERT(0);
diff --git a/passt.h b/passt.h
index 6ed1d0b19e82..4e0100d51a4d 100644
--- a/passt.h
+++ b/passt.h
@@ -141,6 +141,7 @@ struct fqdn {
 enum passt_modes {
 	MODE_PASST,
 	MODE_PASTA,
+	MODE_VU,
 };
 
 /**
diff --git a/tap.c b/tap.c
index 936206e53637..c2a917bc00ca 100644
--- a/tap.c
+++ b/tap.c
@@ -57,6 +57,7 @@
 #include "packet.h"
 #include "tap.h"
 #include "log.h"
+#include "vhost_user.h"
 
 /* IPv4 (plus ARP) and IPv6 message batches from tap/guest to IP handlers */
 static PACKET_POOL_NOINIT(pool_tap4, TAP_MSGS, pkt_buf);
@@ -75,19 +76,22 @@ static PACKET_POOL_NOINIT(pool_tap6, TAP_MSGS, pkt_buf);
  */
 int tap_send(const struct ctx *c, const void *data, size_t len)
 {
-	pcap(data, len);
+	int flags = MSG_NOSIGNAL | MSG_DONTWAIT;
+	uint32_t vnet_len = htonl(len);
 
-	if (c->mode == MODE_PASST) {
-		int flags = MSG_NOSIGNAL | MSG_DONTWAIT;
-		uint32_t vnet_len = htonl(len);
+	pcap(data, len);
 
+	switch (c->mode) {
+	case MODE_PASST:
 		if (send(c->fd_tap, &vnet_len, 4, flags) < 0)
 			return -1;
-
 		return send(c->fd_tap, data, len, flags);
+	case MODE_PASTA:
+		return write(c->fd_tap, (char *)data, len);
+	case MODE_VU:
+		return vu_send(c, data, len);
 	}
-
-	return write(c->fd_tap, (char *)data, len);
+	return 0;
 }
 
 /**
@@ -428,10 +432,20 @@ size_t tap_send_frames(const struct ctx *c, const struct iovec *iov, size_t n)
 	if (!n)
 		return 0;
 
-	if (c->mode == MODE_PASTA)
+	switch (c->mode) {
+	case MODE_PASTA:
 		m = tap_send_frames_pasta(c, iov, n);
-	else
+		break;
+	case MODE_PASST:
 		m = tap_send_frames_passt(c, iov, n);
+		break;
+	case MODE_VU:
+		m = tap_send_frames_vu(c, iov, n);
+		break;
+	default:
+		m = 0;
+		break;
+	}
 
 	if (m < n)
 		debug("tap: failed to send %zu frames of %zu", n - m, n);
@@ -1149,11 +1163,17 @@ static void tap_sock_unix_init(struct ctx *c)
 	ev.data.u64 = ref.u64;
 	epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap_listen, &ev);
 
-	info("You can now start qemu (>= 7.2, with commit 13c6be96618c):");
-	info("    kvm ... -device virtio-net-pci,netdev=s -netdev stream,id=s,server=off,addr.type=unix,addr.path=%s",
-	     addr.sun_path);
-	info("or qrap, for earlier qemu versions:");
-	info("    ./qrap 5 kvm ... -net socket,fd=5 -net nic,model=virtio");
+	if (c->mode == MODE_VU) {
+		info("You can start qemu with:");
+		info("    kvm ... -chardev socket,id=chr0,path=%s -netdev vhost-user,id=netdev0,chardev=chr0 -device virtio-net,netdev=netdev0 -object memory-backend-memfd,id=memfd0,share=on,size=$RAMSIZE -numa node,memdev=memfd0\n",
+		     addr.sun_path);
+	} else {
+		info("You can now start qemu (>= 7.2, with commit 13c6be96618c):");
+		info("    kvm ... -device virtio-net-pci,netdev=s -netdev stream,id=s,server=off,addr.type=unix,addr.path=%s",
+		     addr.sun_path);
+		info("or qrap, for earlier qemu versions:");
+		info("    ./qrap 5 kvm ... -net socket,fd=5 -net nic,model=virtio");
+	}
 }
 
 /**
@@ -1163,7 +1183,7 @@ static void tap_sock_unix_init(struct ctx *c)
  */
 void tap_listen_handler(struct ctx *c, uint32_t events)
 {
-	union epoll_ref ref = { .type = EPOLL_TYPE_TAP_PASST };
+	union epoll_ref ref;
 	struct epoll_event ev = { 0 };
 	int v = INT_MAX / 2;
 	struct ucred ucred;
@@ -1204,7 +1224,13 @@ void tap_listen_handler(struct ctx *c, uint32_t events)
 		trace("tap: failed to set SO_SNDBUF to %i", v);
 
 	ref.fd = c->fd_tap;
-	ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP;
+	if (c->mode == MODE_VU) {
+		ref.type = EPOLL_TYPE_VHOST_CMD;
+		ev.events = EPOLLIN | EPOLLRDHUP;
+	} else {
+		ref.type = EPOLL_TYPE_TAP_PASST;
+		ev.events = EPOLLIN | EPOLLRDHUP | EPOLLET;
+	}
 	ev.data.u64 = ref.u64;
 	epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev);
 }
@@ -1288,12 +1314,21 @@ void tap_sock_init(struct ctx *c)
 
 		ASSERT(c->one_off);
 		ref.fd = c->fd_tap;
-		if (c->mode == MODE_PASST)
+		switch (c->mode) {
+		case MODE_PASST:
 			ref.type = EPOLL_TYPE_TAP_PASST;
-		else
+			ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP;
+			break;
+		case MODE_PASTA:
 			ref.type = EPOLL_TYPE_TAP_PASTA;
+			ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP;
+			break;
+		case MODE_VU:
+			ref.type = EPOLL_TYPE_VHOST_CMD;
+			ev.events = EPOLLIN | EPOLLRDHUP;
+			break;
+		}
 
-		ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP;
 		ev.data.u64 = ref.u64;
 		epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev);
 		return;
diff --git a/tcp.c b/tcp.c
index 54c15087d678..b6aca9f37f19 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1033,7 +1033,9 @@ size_t ipv4_fill_headers(const struct ctx *c,
 
 	tcp_set_tcp_header(th, conn, seq);
 
-	th->check = tcp_update_check_tcp4(iph);
+	th->check = 0;
+	if (c->mode != MODE_VU || *c->pcap)
+		th->check = tcp_update_check_tcp4(iph);
 
 	return ip_len;
 }
@@ -1069,7 +1071,9 @@ size_t ipv6_fill_headers(const struct ctx *c,
 
 	tcp_set_tcp_header(th, conn, seq);
 
-	th->check = tcp_update_check_tcp6(ip6h);
+	th->check = 0;
+	if (c->mode != MODE_VU || *c->pcap)
+		th->check = tcp_update_check_tcp6(ip6h);
 
 	ip6h->hop_limit = 255;
 	ip6h->version = 6;
diff --git a/udp.c b/udp.c
index a189c2e0b5a2..799a10989a91 100644
--- a/udp.c
+++ b/udp.c
@@ -671,8 +671,10 @@ static size_t udp_update_hdr6(const struct ctx *c, struct ipv6hdr *ip6h,
 	uh->source = s_in6->sin6_port;
 	uh->dest = htons(dstport);
 	uh->len = ip6h->payload_len;
-	uh->check = csum(uh, ntohs(ip6h->payload_len),
-			 proto_ipv6_header_checksum(ip6h, IPPROTO_UDP));
+	uh->check = 0;
+	if (c->mode != MODE_VU || *c->pcap)
+		uh->check = csum(uh, ntohs(ip6h->payload_len),
+				 proto_ipv6_header_checksum(ip6h, IPPROTO_UDP));
 	ip6h->version = 6;
 	ip6h->nexthdr = IPPROTO_UDP;
 	ip6h->hop_limit = 255;
-- 
@@ -671,8 +671,10 @@ static size_t udp_update_hdr6(const struct ctx *c, struct ipv6hdr *ip6h,
 	uh->source = s_in6->sin6_port;
 	uh->dest = htons(dstport);
 	uh->len = ip6h->payload_len;
-	uh->check = csum(uh, ntohs(ip6h->payload_len),
-			 proto_ipv6_header_checksum(ip6h, IPPROTO_UDP));
+	uh->check = 0;
+	if (c->mode != MODE_VU || *c->pcap)
+		uh->check = csum(uh, ntohs(ip6h->payload_len),
+				 proto_ipv6_header_checksum(ip6h, IPPROTO_UDP));
 	ip6h->version = 6;
 	ip6h->nexthdr = IPPROTO_UDP;
 	ip6h->hop_limit = 255;
-- 
2.42.0


  parent reply	other threads:[~2024-02-02 14:11 UTC|newest]

Thread overview: 83+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-02 14:11 [PATCH 00/24] Add vhost-user support to passt Laurent Vivier
2024-02-02 14:11 ` [PATCH 01/24] iov: add some functions to manage iovec Laurent Vivier
2024-02-05  5:57   ` David Gibson
2024-02-06 14:28     ` Laurent Vivier
2024-02-07  1:01       ` David Gibson
2024-02-07 10:00         ` Laurent Vivier
2024-02-06 16:10   ` Stefano Brivio
2024-02-07 14:02     ` Laurent Vivier
2024-02-07 14:57       ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 02/24] pcap: add pcap_iov() Laurent Vivier
2024-02-05  6:25   ` David Gibson
2024-02-06 16:10   ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 03/24] checksum: align buffers Laurent Vivier
2024-02-05  6:02   ` David Gibson
2024-02-07  9:01     ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 04/24] checksum: add csum_iov() Laurent Vivier
2024-02-05  6:07   ` David Gibson
2024-02-07  9:02   ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 05/24] util: move IP stuff from util.[ch] to ip.[ch] Laurent Vivier
2024-02-05  6:13   ` David Gibson
2024-02-07  9:03     ` Stefano Brivio
2024-02-08  0:04       ` David Gibson
2024-02-02 14:11 ` [PATCH 06/24] ip: move duplicate IPv4 checksum function to ip.h Laurent Vivier
2024-02-05  6:16   ` David Gibson
2024-02-07 10:40   ` Stefano Brivio
2024-02-07 23:43     ` David Gibson
2024-02-02 14:11 ` [PATCH 07/24] ip: introduce functions to compute the header part checksum for TCP/UDP Laurent Vivier
2024-02-05  6:20   ` David Gibson
2024-02-07 10:41   ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 08/24] tcp: extract buffer management from tcp_send_flag() Laurent Vivier
2024-02-06  0:24   ` David Gibson
2024-02-08 16:57   ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 09/24] tcp: extract buffer management from tcp_conn_tap_mss() Laurent Vivier
2024-02-06  0:47   ` David Gibson
2024-02-08 16:59   ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 10/24] tcp: rename functions that manage buffers Laurent Vivier
2024-02-06  1:48   ` David Gibson
2024-02-08 17:10     ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 11/24] tcp: move buffers management functions to their own file Laurent Vivier
2024-02-02 14:11 ` [PATCH 12/24] tap: make tap_update_mac() generic Laurent Vivier
2024-02-06  1:49   ` David Gibson
2024-02-08 17:10     ` Stefano Brivio
2024-02-09  5:02       ` David Gibson
2024-02-02 14:11 ` [PATCH 13/24] tap: export pool_flush()/tapX_handler()/packet_add() Laurent Vivier
2024-02-02 14:29   ` Laurent Vivier
2024-02-06  1:52   ` David Gibson
2024-02-11 23:15   ` Stefano Brivio
2024-02-12  2:22     ` David Gibson
2024-02-02 14:11 ` [PATCH 14/24] udp: move udpX_l2_buf_t and udpX_l2_mh_sock out of udp_update_hdrX() Laurent Vivier
2024-02-06  1:59   ` David Gibson
2024-02-11 23:16   ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 15/24] udp: rename udp_sock_handler() to udp_buf_sock_handler() Laurent Vivier
2024-02-06  2:14   ` David Gibson
2024-02-11 23:17     ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 16/24] packet: replace struct desc by struct iovec Laurent Vivier
2024-02-06  2:25   ` David Gibson
2024-02-11 23:18     ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 17/24] vhost-user: compare mode MODE_PASTA and not MODE_PASST Laurent Vivier
2024-02-06  2:29   ` David Gibson
2024-02-02 14:11 ` [PATCH 18/24] vhost-user: introduce virtio API Laurent Vivier
2024-02-06  3:51   ` David Gibson
2024-02-11 23:18     ` Stefano Brivio
2024-02-12  2:26       ` David Gibson
2024-02-02 14:11 ` [PATCH 19/24] vhost-user: introduce vhost-user API Laurent Vivier
2024-02-07  2:13   ` David Gibson
2024-02-02 14:11 ` Laurent Vivier [this message]
2024-02-07  2:40   ` [PATCH 20/24] vhost-user: add vhost-user David Gibson
2024-02-11 23:19     ` Stefano Brivio
2024-02-12  2:47       ` David Gibson
2024-02-13 15:22         ` Stefano Brivio
2024-02-14  2:05           ` David Gibson
2024-02-11 23:19   ` Stefano Brivio
2024-02-12  2:49     ` David Gibson
2024-02-12 10:02       ` Laurent Vivier
2024-02-12 16:56         ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 21/24] vhost-user: use guest buffer directly in vu_handle_tx() Laurent Vivier
2024-02-09  4:26   ` David Gibson
2024-02-02 14:11 ` [PATCH 22/24] tcp: vhost-user RX nocopy Laurent Vivier
2024-02-09  4:57   ` David Gibson
2024-02-02 14:11 ` [PATCH 23/24] udp: " Laurent Vivier
2024-02-09  5:00   ` David Gibson
2024-02-02 14:11 ` [PATCH 24/24] vhost-user: remove tap_send_frames_vu() Laurent Vivier
2024-02-09  5:01   ` David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240202141151.3762941-21-lvivier@redhat.com \
    --to=lvivier@redhat.com \
    --cc=passt-dev@passt.top \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).