From: Laurent Vivier <lvivier@redhat.com>
To: passt-dev@passt.top
Cc: Laurent Vivier <lvivier@redhat.com>
Subject: [PATCH 23/24] udp: vhost-user RX nocopy
Date: Fri, 2 Feb 2024 15:11:50 +0100 [thread overview]
Message-ID: <20240202141151.3762941-24-lvivier@redhat.com> (raw)
In-Reply-To: <20240202141151.3762941-1-lvivier@redhat.com>
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
Makefile | 4 +-
passt.c | 5 +-
passt.h | 1 +
udp.c | 23 +++---
udp_internal.h | 21 +++++
udp_vu.c | 215 +++++++++++++++++++++++++++++++++++++++++++++++++
udp_vu.h | 8 ++
7 files changed, 262 insertions(+), 15 deletions(-)
create mode 100644 udp_internal.h
create mode 100644 udp_vu.c
create mode 100644 udp_vu.h
diff --git a/Makefile b/Makefile
index f7a403d19b61..1d2b5dbfe085 100644
--- a/Makefile
+++ b/Makefile
@@ -47,7 +47,7 @@ FLAGS += -DDUAL_STACK_SOCKETS=$(DUAL_STACK_SOCKETS)
PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c flow.c icmp.c \
igmp.c isolation.c lineread.c log.c mld.c ndp.c netlink.c packet.c \
passt.c pasta.c pcap.c pif.c port_fwd.c tap.c tcp.c tcp_splice.c \
- tcp_buf.c tcp_vu.c udp.c util.c iov.c ip.c virtio.c vhost_user.c
+ tcp_buf.c tcp_vu.c udp.c udp_vu.c util.c iov.c ip.c virtio.c vhost_user.c
QRAP_SRCS = qrap.c
SRCS = $(PASST_SRCS) $(QRAP_SRCS)
@@ -57,7 +57,7 @@ PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h \
flow_table.h icmp.h inany.h isolation.h lineread.h log.h ndp.h \
netlink.h packet.h passt.h pasta.h pcap.h pif.h port_fwd.h siphash.h \
tap.h tcp.h tcp_conn.h tcp_splice.h tcp_buf.h tcp_vu.h tcp_internal.h \
- udp.h util.h iov.h ip.h virtio.h vhost_user.h
+ udp.h udp_internal.h udp_vu.h util.h iov.h ip.h virtio.h vhost_user.h
HEADERS = $(PASST_HEADERS) seccomp.h
C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_snd_wnd = 0 };
diff --git a/passt.c b/passt.c
index 952aded12848..a5abd5c4fc03 100644
--- a/passt.c
+++ b/passt.c
@@ -392,7 +392,10 @@ loop:
tcp_timer_handler(&c, ref);
break;
case EPOLL_TYPE_UDP:
- udp_buf_sock_handler(&c, ref, eventmask, &now);
+ if (c.mode == MODE_VU)
+ udp_vu_sock_handler(&c, ref, eventmask, &now);
+ else
+ udp_buf_sock_handler(&c, ref, eventmask, &now);
break;
case EPOLL_TYPE_ICMP:
icmp_sock_handler(&c, AF_INET, ref);
diff --git a/passt.h b/passt.h
index 4e0100d51a4d..04f4af8fd72e 100644
--- a/passt.h
+++ b/passt.h
@@ -42,6 +42,7 @@ union epoll_ref;
#include "port_fwd.h"
#include "tcp.h"
#include "udp.h"
+#include "udp_vu.h"
#include "vhost_user.h"
/**
diff --git a/udp.c b/udp.c
index 799a10989a91..da67d0cfa46b 100644
--- a/udp.c
+++ b/udp.c
@@ -117,9 +117,7 @@
#include "tap.h"
#include "pcap.h"
#include "log.h"
-
-#define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */
-#define UDP_MAX_FRAMES 32 /* max # of frames to receive at once */
+#include "udp_internal.h"
/**
* struct udp_tap_port - Port tracking based on tap-facing source port
@@ -227,11 +225,11 @@ static struct mmsghdr udp6_l2_mh_sock [UDP_MAX_FRAMES];
static struct iovec udp4_iov_splice [UDP_MAX_FRAMES];
static struct iovec udp6_iov_splice [UDP_MAX_FRAMES];
-static struct sockaddr_in udp4_localname = {
+struct sockaddr_in udp4_localname = {
.sin_family = AF_INET,
.sin_addr = IN4ADDR_LOOPBACK_INIT,
};
-static struct sockaddr_in6 udp6_localname = {
+struct sockaddr_in6 udp6_localname = {
.sin6_family = AF_INET6,
.sin6_addr = IN6ADDR_LOOPBACK_INIT,
};
@@ -562,9 +560,9 @@ static void udp_splice_sendfrom(const struct ctx *c, unsigned start, unsigned n,
*
* Return: size of tap frame with headers
*/
-static size_t udp_update_hdr4(const struct ctx *c, struct iphdr *iph,
- size_t data_len, struct sockaddr_in *s_in,
- in_port_t dstport, const struct timespec *now)
+size_t udp_update_hdr4(const struct ctx *c, struct iphdr *iph,
+ size_t data_len, struct sockaddr_in *s_in,
+ in_port_t dstport, const struct timespec *now)
{
struct udphdr *uh = (struct udphdr *)(iph + 1);
in_port_t src_port;
@@ -602,6 +600,7 @@ static size_t udp_update_hdr4(const struct ctx *c, struct iphdr *iph,
uh->source = s_in->sin_port;
uh->dest = htons(dstport);
uh->len= htons(data_len + sizeof(struct udphdr));
+ uh->check = 0;
return ip_len;
}
@@ -615,9 +614,9 @@ static size_t udp_update_hdr4(const struct ctx *c, struct iphdr *iph,
*
* Return: size of tap frame with headers
*/
-static size_t udp_update_hdr6(const struct ctx *c, struct ipv6hdr *ip6h,
- size_t data_len, struct sockaddr_in6 *s_in6,
- in_port_t dstport, const struct timespec *now)
+size_t udp_update_hdr6(const struct ctx *c, struct ipv6hdr *ip6h,
+ size_t data_len, struct sockaddr_in6 *s_in6,
+ in_port_t dstport, const struct timespec *now)
{
struct udphdr *uh = (struct udphdr *)(ip6h + 1);
struct in6_addr *src;
@@ -672,7 +671,7 @@ static size_t udp_update_hdr6(const struct ctx *c, struct ipv6hdr *ip6h,
uh->dest = htons(dstport);
uh->len = ip6h->payload_len;
uh->check = 0;
- if (c->mode != MODE_VU || *c->pcap)
+ if (c->mode != MODE_VU)
uh->check = csum(uh, ntohs(ip6h->payload_len),
proto_ipv6_header_checksum(ip6h, IPPROTO_UDP));
ip6h->version = 6;
diff --git a/udp_internal.h b/udp_internal.h
new file mode 100644
index 000000000000..a09f3c69da42
--- /dev/null
+++ b/udp_internal.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright (c) 2021 Red Hat GmbH
+ * Author: Stefano Brivio <sbrivio@redhat.com>
+ */
+
+#ifndef UDP_INTERNAL_H
+#define UDP_INTERNAL_H
+
+#define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */
+#define UDP_MAX_FRAMES 32 /* max # of frames to receive at once */
+
+extern struct sockaddr_in udp4_localname;
+extern struct sockaddr_in6 udp6_localname;
+
+size_t udp_update_hdr4(const struct ctx *c, struct iphdr *iph,
+ size_t data_len, struct sockaddr_in *s_in,
+ in_port_t dstport, const struct timespec *now);
+size_t udp_update_hdr6(const struct ctx *c, struct ipv6hdr *ip6h,
+ size_t data_len, struct sockaddr_in6 *s_in6,
+ in_port_t dstport, const struct timespec *now);
+#endif /* UDP_INTERNAL_H */
diff --git a/udp_vu.c b/udp_vu.c
new file mode 100644
index 000000000000..c0f4cb90abd2
--- /dev/null
+++ b/udp_vu.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <unistd.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <sys/uio.h>
+#include <linux/virtio_net.h>
+
+#include "checksum.h"
+#include "util.h"
+#include "ip.h"
+#include "passt.h"
+#include "pcap.h"
+#include "log.h"
+#include "vhost_user.h"
+#include "udp_internal.h"
+#include "udp_vu.h"
+
+/* vhost-user */
+static const struct virtio_net_hdr vu_header = {
+ .flags = VIRTIO_NET_HDR_F_DATA_VALID,
+ .gso_type = VIRTIO_NET_HDR_GSO_NONE,
+};
+
+static unsigned char buffer[65536];
+static struct iovec iov_vu [VIRTQUEUE_MAX_SIZE];
+static unsigned int indexes [VIRTQUEUE_MAX_SIZE];
+
+void udp_vu_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events,
+ const struct timespec *now)
+{
+ VuDev *vdev = (VuDev *)&c->vdev;
+ VuVirtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
+ size_t l2_hdrlen, vnet_hdrlen, fillsize;
+ ssize_t data_len;
+ in_port_t dstport = ref.udp.port;
+ bool has_mrg_rxbuf, v6 = ref.udp.v6;
+ struct msghdr msg;
+ int i, iov_count, iov_used, virtqueue_max;
+
+ if (c->no_udp || !(events & EPOLLIN))
+ return;
+
+ has_mrg_rxbuf = vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF);
+ if (has_mrg_rxbuf) {
+ vnet_hdrlen = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+ virtqueue_max = VIRTQUEUE_MAX_SIZE;
+ } else {
+ vnet_hdrlen = sizeof(struct virtio_net_hdr);
+ virtqueue_max = 1;
+ }
+ l2_hdrlen = vnet_hdrlen + sizeof(struct ethhdr) + sizeof(struct udphdr);
+
+ if (v6) {
+ l2_hdrlen += sizeof(struct ipv6hdr);
+
+ udp6_localname.sin6_port = htons(dstport);
+ msg.msg_name = &udp6_localname;
+ msg.msg_namelen = sizeof(udp6_localname);
+ } else {
+ l2_hdrlen += sizeof(struct iphdr);
+
+ udp4_localname.sin_port = htons(dstport);
+ msg.msg_name = &udp4_localname;
+ msg.msg_namelen = sizeof(udp4_localname);
+ }
+
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ for (i = 0; i < UDP_MAX_FRAMES; i++) {
+ struct virtio_net_hdr_mrg_rxbuf *vh;
+ struct ethhdr *eh;
+ char *base;
+ size_t size;
+
+ fillsize = USHRT_MAX;
+ iov_count = 0;
+ while (fillsize && iov_count < virtqueue_max) {
+ VuVirtqElement *elem;
+
+ elem = vu_queue_pop(vdev, vq, sizeof(VuVirtqElement), buffer);
+ if (!elem)
+ break;
+
+ if (elem->in_num < 1) {
+ err("virtio-net receive queue contains no in buffers");
+ vu_queue_rewind(vdev, vq, iov_count);
+ return;
+ }
+ ASSERT(elem->in_num == 1);
+ ASSERT(elem->in_sg[0].iov_len >= l2_hdrlen);
+
+ indexes[iov_count] = elem->index;
+ if (iov_count == 0) {
+ iov_vu[0].iov_base = (char *)elem->in_sg[0].iov_base + l2_hdrlen;
+ iov_vu[0].iov_len = elem->in_sg[0].iov_len - l2_hdrlen;
+ } else {
+ iov_vu[iov_count].iov_base = elem->in_sg[0].iov_base;
+ iov_vu[iov_count].iov_len = elem->in_sg[0].iov_len;
+ }
+
+ if (iov_vu[iov_count].iov_len > fillsize)
+ iov_vu[iov_count].iov_len = fillsize;
+
+ fillsize -= iov_vu[iov_count].iov_len;
+
+ iov_count++;
+ }
+ if (iov_count == 0)
+ break;
+
+ msg.msg_iov = iov_vu;
+ msg.msg_iovlen = iov_count;
+
+ data_len = recvmsg(ref.fd, &msg, 0);
+ if (data_len < 0) {
+ vu_queue_rewind(vdev, vq, iov_count);
+ return;
+ }
+
+ iov_used = 0;
+ size = data_len;
+ while (size) {
+ if (iov_vu[iov_used].iov_len > size)
+ iov_vu[iov_used].iov_len = size;
+
+ size -= iov_vu[iov_used].iov_len;
+ iov_used++;
+ }
+
+ base = (char *)iov_vu[0].iov_base - l2_hdrlen;
+ size = iov_vu[0].iov_len + l2_hdrlen;
+
+ /* release unused buffers */
+ vu_queue_rewind(vdev, vq, iov_count - iov_used);
+
+ /* vnet_header */
+ vh = (struct virtio_net_hdr_mrg_rxbuf *)base;
+ vh->hdr = vu_header;
+ if (has_mrg_rxbuf)
+ vh->num_buffers = htole16(iov_used);
+
+ /* ethernet header */
+ eh = (struct ethhdr *)(base + vnet_hdrlen);
+
+ memcpy(eh->h_dest, c->mac_guest, sizeof(eh->h_dest));
+ memcpy(eh->h_source, c->mac, sizeof(eh->h_source));
+
+ /* initialize header */
+ if (v6) {
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(eh + 1);
+ struct udphdr *uh = (struct udphdr *)(ip6h + 1);
+ uint32_t sum;
+
+ eh->h_proto = htons(ETH_P_IPV6);
+
+ *ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_UDP);
+
+ udp_update_hdr6(c, ip6h, data_len, &udp6_localname,
+ dstport, now);
+ if (*c->pcap) {
+ sum = proto_ipv6_header_checksum(ip6h, IPPROTO_UDP);
+
+ iov_vu[0].iov_base = uh;
+ iov_vu[0].iov_len = size - l2_hdrlen + sizeof(*uh);
+ uh->check = csum_iov(iov_vu, iov_used, sum);
+ } else {
+ /* 0 checksum is invalid with IPv6/UDP */
+ uh->check = 0xFFFF;
+ }
+ } else {
+ struct iphdr *iph = (struct iphdr *)(eh + 1);
+ struct udphdr *uh = (struct udphdr *)(iph + 1);
+ uint32_t sum;
+
+ eh->h_proto = htons(ETH_P_IP);
+
+ *iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_UDP);
+
+ udp_update_hdr4(c, iph, data_len, &udp4_localname,
+ dstport, now);
+ if (*c->pcap) {
+ sum = proto_ipv4_header_checksum(iph, IPPROTO_UDP);
+
+ iov_vu[0].iov_base = uh;
+ iov_vu[0].iov_len = size - l2_hdrlen + sizeof(*uh);
+ uh->check = csum_iov(iov_vu, iov_used, sum);
+ }
+ }
+
+ /* set iov for pcap logging */
+ iov_vu[0].iov_base = base + vnet_hdrlen;
+ iov_vu[0].iov_len = size - vnet_hdrlen;
+ pcap_iov(iov_vu, iov_used);
+
+ /* set iov_len for vu_queue_fill_by_index(); */
+ iov_vu[0].iov_base = base;
+ iov_vu[0].iov_len = size;
+
+ /* send packets */
+ for (i = 0; i < iov_used; i++)
+ vu_queue_fill_by_index(vdev, vq, indexes[i],
+ iov_vu[i].iov_len, i);
+
+ vu_queue_flush(vdev, vq, iov_used);
+ vu_queue_notify(vdev, vq);
+ }
+}
diff --git a/udp_vu.h b/udp_vu.h
new file mode 100644
index 000000000000..e01ce047ee0a
--- /dev/null
+++ b/udp_vu.h
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#ifndef UDP_VU_H
+#define UDP_VU_H
+
+void udp_vu_sock_handler(const struct ctx *c, union epoll_ref ref,
+ uint32_t events, const struct timespec *now);
+#endif /* UDP_VU_H */
--
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#ifndef UDP_VU_H
+#define UDP_VU_H
+
+void udp_vu_sock_handler(const struct ctx *c, union epoll_ref ref,
+ uint32_t events, const struct timespec *now);
+#endif /* UDP_VU_H */
--
2.42.0
next prev parent reply other threads:[~2024-02-02 14:11 UTC|newest]
Thread overview: 83+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-02-02 14:11 [PATCH 00/24] Add vhost-user support to passt Laurent Vivier
2024-02-02 14:11 ` [PATCH 01/24] iov: add some functions to manage iovec Laurent Vivier
2024-02-05 5:57 ` David Gibson
2024-02-06 14:28 ` Laurent Vivier
2024-02-07 1:01 ` David Gibson
2024-02-07 10:00 ` Laurent Vivier
2024-02-06 16:10 ` Stefano Brivio
2024-02-07 14:02 ` Laurent Vivier
2024-02-07 14:57 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 02/24] pcap: add pcap_iov() Laurent Vivier
2024-02-05 6:25 ` David Gibson
2024-02-06 16:10 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 03/24] checksum: align buffers Laurent Vivier
2024-02-05 6:02 ` David Gibson
2024-02-07 9:01 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 04/24] checksum: add csum_iov() Laurent Vivier
2024-02-05 6:07 ` David Gibson
2024-02-07 9:02 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 05/24] util: move IP stuff from util.[ch] to ip.[ch] Laurent Vivier
2024-02-05 6:13 ` David Gibson
2024-02-07 9:03 ` Stefano Brivio
2024-02-08 0:04 ` David Gibson
2024-02-02 14:11 ` [PATCH 06/24] ip: move duplicate IPv4 checksum function to ip.h Laurent Vivier
2024-02-05 6:16 ` David Gibson
2024-02-07 10:40 ` Stefano Brivio
2024-02-07 23:43 ` David Gibson
2024-02-02 14:11 ` [PATCH 07/24] ip: introduce functions to compute the header part checksum for TCP/UDP Laurent Vivier
2024-02-05 6:20 ` David Gibson
2024-02-07 10:41 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 08/24] tcp: extract buffer management from tcp_send_flag() Laurent Vivier
2024-02-06 0:24 ` David Gibson
2024-02-08 16:57 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 09/24] tcp: extract buffer management from tcp_conn_tap_mss() Laurent Vivier
2024-02-06 0:47 ` David Gibson
2024-02-08 16:59 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 10/24] tcp: rename functions that manage buffers Laurent Vivier
2024-02-06 1:48 ` David Gibson
2024-02-08 17:10 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 11/24] tcp: move buffers management functions to their own file Laurent Vivier
2024-02-02 14:11 ` [PATCH 12/24] tap: make tap_update_mac() generic Laurent Vivier
2024-02-06 1:49 ` David Gibson
2024-02-08 17:10 ` Stefano Brivio
2024-02-09 5:02 ` David Gibson
2024-02-02 14:11 ` [PATCH 13/24] tap: export pool_flush()/tapX_handler()/packet_add() Laurent Vivier
2024-02-02 14:29 ` Laurent Vivier
2024-02-06 1:52 ` David Gibson
2024-02-11 23:15 ` Stefano Brivio
2024-02-12 2:22 ` David Gibson
2024-02-02 14:11 ` [PATCH 14/24] udp: move udpX_l2_buf_t and udpX_l2_mh_sock out of udp_update_hdrX() Laurent Vivier
2024-02-06 1:59 ` David Gibson
2024-02-11 23:16 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 15/24] udp: rename udp_sock_handler() to udp_buf_sock_handler() Laurent Vivier
2024-02-06 2:14 ` David Gibson
2024-02-11 23:17 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 16/24] packet: replace struct desc by struct iovec Laurent Vivier
2024-02-06 2:25 ` David Gibson
2024-02-11 23:18 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 17/24] vhost-user: compare mode MODE_PASTA and not MODE_PASST Laurent Vivier
2024-02-06 2:29 ` David Gibson
2024-02-02 14:11 ` [PATCH 18/24] vhost-user: introduce virtio API Laurent Vivier
2024-02-06 3:51 ` David Gibson
2024-02-11 23:18 ` Stefano Brivio
2024-02-12 2:26 ` David Gibson
2024-02-02 14:11 ` [PATCH 19/24] vhost-user: introduce vhost-user API Laurent Vivier
2024-02-07 2:13 ` David Gibson
2024-02-02 14:11 ` [PATCH 20/24] vhost-user: add vhost-user Laurent Vivier
2024-02-07 2:40 ` David Gibson
2024-02-11 23:19 ` Stefano Brivio
2024-02-12 2:47 ` David Gibson
2024-02-13 15:22 ` Stefano Brivio
2024-02-14 2:05 ` David Gibson
2024-02-11 23:19 ` Stefano Brivio
2024-02-12 2:49 ` David Gibson
2024-02-12 10:02 ` Laurent Vivier
2024-02-12 16:56 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 21/24] vhost-user: use guest buffer directly in vu_handle_tx() Laurent Vivier
2024-02-09 4:26 ` David Gibson
2024-02-02 14:11 ` [PATCH 22/24] tcp: vhost-user RX nocopy Laurent Vivier
2024-02-09 4:57 ` David Gibson
2024-02-02 14:11 ` Laurent Vivier [this message]
2024-02-09 5:00 ` [PATCH 23/24] udp: " David Gibson
2024-02-02 14:11 ` [PATCH 24/24] vhost-user: remove tap_send_frames_vu() Laurent Vivier
2024-02-09 5:01 ` David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240202141151.3762941-24-lvivier@redhat.com \
--to=lvivier@redhat.com \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).