From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.129.124]) by passt.top (Postfix) with ESMTP id 8F4765A0282 for ; Fri, 2 Feb 2024 15:11:58 +0100 (CET) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1706883117; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=KmcxL/Y9RCVgp22FtPPfxUkSthjX+rHmPGq9vV/uYSM=; b=df4QnvNeHbRx1JX0m9gbwq7gXYq6d8Xiq9aJxk2PcgF9FiU19E62Ez9N7FCCMwVnjDV44A a1rC7YHwbGOTT3CuesZ8nYjATehrAiNt8vrJOKWqRF6rDmu3fHfKmpiOA4XCpU/rlCZ6Yt UPCNkpkJF2rkOmj2QTdEnlNRzhqrEBE= Received: from mimecast-mx02.redhat.com (mx-ext.redhat.com [66.187.233.73]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id us-mta-225-d015A20GMtGLqCKzxTMwkw-1; Fri, 02 Feb 2024 09:11:56 -0500 X-MC-Unique: d015A20GMtGLqCKzxTMwkw-1 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.rdu2.redhat.com [10.11.54.1]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id F18321C0BA53 for ; Fri, 2 Feb 2024 14:11:55 +0000 (UTC) Received: from virtlab218.virt.lab.eng.bos.redhat.com (virtlab218.virt.lab.eng.bos.redhat.com [10.19.152.190]) by smtp.corp.redhat.com (Postfix) with ESMTP id DA2E43C2E; Fri, 2 Feb 2024 14:11:55 +0000 (UTC) From: Laurent Vivier To: passt-dev@passt.top Subject: [PATCH 23/24] udp: vhost-user RX nocopy Date: Fri, 2 Feb 2024 15:11:50 +0100 Message-ID: <20240202141151.3762941-24-lvivier@redhat.com> In-Reply-To: <20240202141151.3762941-1-lvivier@redhat.com> References: <20240202141151.3762941-1-lvivier@redhat.com> MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.4.1 on 10.11.54.1 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset="US-ASCII"; x-default=true Message-ID-Hash: 5ZFO4MWAUWLK2YF5VBDEQY47Y7T7AZAS X-Message-ID-Hash: 5ZFO4MWAUWLK2YF5VBDEQY47Y7T7AZAS X-MailFrom: lvivier@redhat.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header CC: Laurent Vivier X-Mailman-Version: 3.3.8 Precedence: list List-Id: Development discussion and patches for passt Archived-At: Archived-At: List-Archive: List-Archive: List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: Signed-off-by: Laurent Vivier --- Makefile | 4 +- passt.c | 5 +- passt.h | 1 + udp.c | 23 +++--- udp_internal.h | 21 +++++ udp_vu.c | 215 +++++++++++++++++++++++++++++++++++++++++++++++++ udp_vu.h | 8 ++ 7 files changed, 262 insertions(+), 15 deletions(-) create mode 100644 udp_internal.h create mode 100644 udp_vu.c create mode 100644 udp_vu.h diff --git a/Makefile b/Makefile index f7a403d19b61..1d2b5dbfe085 100644 --- a/Makefile +++ b/Makefile @@ -47,7 +47,7 @@ FLAGS += -DDUAL_STACK_SOCKETS=$(DUAL_STACK_SOCKETS) PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c flow.c icmp.c \ igmp.c isolation.c lineread.c log.c mld.c ndp.c netlink.c packet.c \ passt.c pasta.c pcap.c pif.c port_fwd.c tap.c tcp.c tcp_splice.c \ - tcp_buf.c tcp_vu.c udp.c util.c iov.c ip.c virtio.c vhost_user.c + tcp_buf.c tcp_vu.c udp.c udp_vu.c util.c iov.c ip.c virtio.c vhost_user.c QRAP_SRCS = qrap.c SRCS = $(PASST_SRCS) $(QRAP_SRCS) @@ -57,7 +57,7 @@ PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h \ flow_table.h icmp.h inany.h isolation.h lineread.h log.h ndp.h \ netlink.h packet.h passt.h pasta.h pcap.h pif.h port_fwd.h siphash.h \ tap.h tcp.h tcp_conn.h tcp_splice.h tcp_buf.h tcp_vu.h tcp_internal.h \ - udp.h util.h iov.h ip.h virtio.h vhost_user.h + udp.h udp_internal.h udp_vu.h util.h iov.h ip.h virtio.h vhost_user.h HEADERS = $(PASST_HEADERS) seccomp.h C := \#include \nstruct tcp_info x = { .tcpi_snd_wnd = 0 }; diff --git a/passt.c b/passt.c index 952aded12848..a5abd5c4fc03 100644 --- a/passt.c +++ b/passt.c @@ -392,7 +392,10 @@ loop: tcp_timer_handler(&c, ref); break; case EPOLL_TYPE_UDP: - udp_buf_sock_handler(&c, ref, eventmask, &now); + if (c.mode == MODE_VU) + udp_vu_sock_handler(&c, ref, eventmask, &now); + else + udp_buf_sock_handler(&c, ref, eventmask, &now); break; case EPOLL_TYPE_ICMP: icmp_sock_handler(&c, AF_INET, ref); diff --git a/passt.h b/passt.h index 4e0100d51a4d..04f4af8fd72e 100644 --- a/passt.h +++ b/passt.h @@ -42,6 +42,7 @@ union epoll_ref; #include "port_fwd.h" #include "tcp.h" #include "udp.h" +#include "udp_vu.h" #include "vhost_user.h" /** diff --git a/udp.c b/udp.c index 799a10989a91..da67d0cfa46b 100644 --- a/udp.c +++ b/udp.c @@ -117,9 +117,7 @@ #include "tap.h" #include "pcap.h" #include "log.h" - -#define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */ -#define UDP_MAX_FRAMES 32 /* max # of frames to receive at once */ +#include "udp_internal.h" /** * struct udp_tap_port - Port tracking based on tap-facing source port @@ -227,11 +225,11 @@ static struct mmsghdr udp6_l2_mh_sock [UDP_MAX_FRAMES]; static struct iovec udp4_iov_splice [UDP_MAX_FRAMES]; static struct iovec udp6_iov_splice [UDP_MAX_FRAMES]; -static struct sockaddr_in udp4_localname = { +struct sockaddr_in udp4_localname = { .sin_family = AF_INET, .sin_addr = IN4ADDR_LOOPBACK_INIT, }; -static struct sockaddr_in6 udp6_localname = { +struct sockaddr_in6 udp6_localname = { .sin6_family = AF_INET6, .sin6_addr = IN6ADDR_LOOPBACK_INIT, }; @@ -562,9 +560,9 @@ static void udp_splice_sendfrom(const struct ctx *c, unsigned start, unsigned n, * * Return: size of tap frame with headers */ -static size_t udp_update_hdr4(const struct ctx *c, struct iphdr *iph, - size_t data_len, struct sockaddr_in *s_in, - in_port_t dstport, const struct timespec *now) +size_t udp_update_hdr4(const struct ctx *c, struct iphdr *iph, + size_t data_len, struct sockaddr_in *s_in, + in_port_t dstport, const struct timespec *now) { struct udphdr *uh = (struct udphdr *)(iph + 1); in_port_t src_port; @@ -602,6 +600,7 @@ static size_t udp_update_hdr4(const struct ctx *c, struct iphdr *iph, uh->source = s_in->sin_port; uh->dest = htons(dstport); uh->len= htons(data_len + sizeof(struct udphdr)); + uh->check = 0; return ip_len; } @@ -615,9 +614,9 @@ static size_t udp_update_hdr4(const struct ctx *c, struct iphdr *iph, * * Return: size of tap frame with headers */ -static size_t udp_update_hdr6(const struct ctx *c, struct ipv6hdr *ip6h, - size_t data_len, struct sockaddr_in6 *s_in6, - in_port_t dstport, const struct timespec *now) +size_t udp_update_hdr6(const struct ctx *c, struct ipv6hdr *ip6h, + size_t data_len, struct sockaddr_in6 *s_in6, + in_port_t dstport, const struct timespec *now) { struct udphdr *uh = (struct udphdr *)(ip6h + 1); struct in6_addr *src; @@ -672,7 +671,7 @@ static size_t udp_update_hdr6(const struct ctx *c, struct ipv6hdr *ip6h, uh->dest = htons(dstport); uh->len = ip6h->payload_len; uh->check = 0; - if (c->mode != MODE_VU || *c->pcap) + if (c->mode != MODE_VU) uh->check = csum(uh, ntohs(ip6h->payload_len), proto_ipv6_header_checksum(ip6h, IPPROTO_UDP)); ip6h->version = 6; diff --git a/udp_internal.h b/udp_internal.h new file mode 100644 index 000000000000..a09f3c69da42 --- /dev/null +++ b/udp_internal.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright (c) 2021 Red Hat GmbH + * Author: Stefano Brivio + */ + +#ifndef UDP_INTERNAL_H +#define UDP_INTERNAL_H + +#define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */ +#define UDP_MAX_FRAMES 32 /* max # of frames to receive at once */ + +extern struct sockaddr_in udp4_localname; +extern struct sockaddr_in6 udp6_localname; + +size_t udp_update_hdr4(const struct ctx *c, struct iphdr *iph, + size_t data_len, struct sockaddr_in *s_in, + in_port_t dstport, const struct timespec *now); +size_t udp_update_hdr6(const struct ctx *c, struct ipv6hdr *ip6h, + size_t data_len, struct sockaddr_in6 *s_in6, + in_port_t dstport, const struct timespec *now); +#endif /* UDP_INTERNAL_H */ diff --git a/udp_vu.c b/udp_vu.c new file mode 100644 index 000000000000..c0f4cb90abd2 --- /dev/null +++ b/udp_vu.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "checksum.h" +#include "util.h" +#include "ip.h" +#include "passt.h" +#include "pcap.h" +#include "log.h" +#include "vhost_user.h" +#include "udp_internal.h" +#include "udp_vu.h" + +/* vhost-user */ +static const struct virtio_net_hdr vu_header = { + .flags = VIRTIO_NET_HDR_F_DATA_VALID, + .gso_type = VIRTIO_NET_HDR_GSO_NONE, +}; + +static unsigned char buffer[65536]; +static struct iovec iov_vu [VIRTQUEUE_MAX_SIZE]; +static unsigned int indexes [VIRTQUEUE_MAX_SIZE]; + +void udp_vu_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events, + const struct timespec *now) +{ + VuDev *vdev = (VuDev *)&c->vdev; + VuVirtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; + size_t l2_hdrlen, vnet_hdrlen, fillsize; + ssize_t data_len; + in_port_t dstport = ref.udp.port; + bool has_mrg_rxbuf, v6 = ref.udp.v6; + struct msghdr msg; + int i, iov_count, iov_used, virtqueue_max; + + if (c->no_udp || !(events & EPOLLIN)) + return; + + has_mrg_rxbuf = vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF); + if (has_mrg_rxbuf) { + vnet_hdrlen = sizeof(struct virtio_net_hdr_mrg_rxbuf); + virtqueue_max = VIRTQUEUE_MAX_SIZE; + } else { + vnet_hdrlen = sizeof(struct virtio_net_hdr); + virtqueue_max = 1; + } + l2_hdrlen = vnet_hdrlen + sizeof(struct ethhdr) + sizeof(struct udphdr); + + if (v6) { + l2_hdrlen += sizeof(struct ipv6hdr); + + udp6_localname.sin6_port = htons(dstport); + msg.msg_name = &udp6_localname; + msg.msg_namelen = sizeof(udp6_localname); + } else { + l2_hdrlen += sizeof(struct iphdr); + + udp4_localname.sin_port = htons(dstport); + msg.msg_name = &udp4_localname; + msg.msg_namelen = sizeof(udp4_localname); + } + + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + for (i = 0; i < UDP_MAX_FRAMES; i++) { + struct virtio_net_hdr_mrg_rxbuf *vh; + struct ethhdr *eh; + char *base; + size_t size; + + fillsize = USHRT_MAX; + iov_count = 0; + while (fillsize && iov_count < virtqueue_max) { + VuVirtqElement *elem; + + elem = vu_queue_pop(vdev, vq, sizeof(VuVirtqElement), buffer); + if (!elem) + break; + + if (elem->in_num < 1) { + err("virtio-net receive queue contains no in buffers"); + vu_queue_rewind(vdev, vq, iov_count); + return; + } + ASSERT(elem->in_num == 1); + ASSERT(elem->in_sg[0].iov_len >= l2_hdrlen); + + indexes[iov_count] = elem->index; + if (iov_count == 0) { + iov_vu[0].iov_base = (char *)elem->in_sg[0].iov_base + l2_hdrlen; + iov_vu[0].iov_len = elem->in_sg[0].iov_len - l2_hdrlen; + } else { + iov_vu[iov_count].iov_base = elem->in_sg[0].iov_base; + iov_vu[iov_count].iov_len = elem->in_sg[0].iov_len; + } + + if (iov_vu[iov_count].iov_len > fillsize) + iov_vu[iov_count].iov_len = fillsize; + + fillsize -= iov_vu[iov_count].iov_len; + + iov_count++; + } + if (iov_count == 0) + break; + + msg.msg_iov = iov_vu; + msg.msg_iovlen = iov_count; + + data_len = recvmsg(ref.fd, &msg, 0); + if (data_len < 0) { + vu_queue_rewind(vdev, vq, iov_count); + return; + } + + iov_used = 0; + size = data_len; + while (size) { + if (iov_vu[iov_used].iov_len > size) + iov_vu[iov_used].iov_len = size; + + size -= iov_vu[iov_used].iov_len; + iov_used++; + } + + base = (char *)iov_vu[0].iov_base - l2_hdrlen; + size = iov_vu[0].iov_len + l2_hdrlen; + + /* release unused buffers */ + vu_queue_rewind(vdev, vq, iov_count - iov_used); + + /* vnet_header */ + vh = (struct virtio_net_hdr_mrg_rxbuf *)base; + vh->hdr = vu_header; + if (has_mrg_rxbuf) + vh->num_buffers = htole16(iov_used); + + /* ethernet header */ + eh = (struct ethhdr *)(base + vnet_hdrlen); + + memcpy(eh->h_dest, c->mac_guest, sizeof(eh->h_dest)); + memcpy(eh->h_source, c->mac, sizeof(eh->h_source)); + + /* initialize header */ + if (v6) { + struct ipv6hdr *ip6h = (struct ipv6hdr *)(eh + 1); + struct udphdr *uh = (struct udphdr *)(ip6h + 1); + uint32_t sum; + + eh->h_proto = htons(ETH_P_IPV6); + + *ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_UDP); + + udp_update_hdr6(c, ip6h, data_len, &udp6_localname, + dstport, now); + if (*c->pcap) { + sum = proto_ipv6_header_checksum(ip6h, IPPROTO_UDP); + + iov_vu[0].iov_base = uh; + iov_vu[0].iov_len = size - l2_hdrlen + sizeof(*uh); + uh->check = csum_iov(iov_vu, iov_used, sum); + } else { + /* 0 checksum is invalid with IPv6/UDP */ + uh->check = 0xFFFF; + } + } else { + struct iphdr *iph = (struct iphdr *)(eh + 1); + struct udphdr *uh = (struct udphdr *)(iph + 1); + uint32_t sum; + + eh->h_proto = htons(ETH_P_IP); + + *iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_UDP); + + udp_update_hdr4(c, iph, data_len, &udp4_localname, + dstport, now); + if (*c->pcap) { + sum = proto_ipv4_header_checksum(iph, IPPROTO_UDP); + + iov_vu[0].iov_base = uh; + iov_vu[0].iov_len = size - l2_hdrlen + sizeof(*uh); + uh->check = csum_iov(iov_vu, iov_used, sum); + } + } + + /* set iov for pcap logging */ + iov_vu[0].iov_base = base + vnet_hdrlen; + iov_vu[0].iov_len = size - vnet_hdrlen; + pcap_iov(iov_vu, iov_used); + + /* set iov_len for vu_queue_fill_by_index(); */ + iov_vu[0].iov_base = base; + iov_vu[0].iov_len = size; + + /* send packets */ + for (i = 0; i < iov_used; i++) + vu_queue_fill_by_index(vdev, vq, indexes[i], + iov_vu[i].iov_len, i); + + vu_queue_flush(vdev, vq, iov_used); + vu_queue_notify(vdev, vq); + } +} diff --git a/udp_vu.h b/udp_vu.h new file mode 100644 index 000000000000..e01ce047ee0a --- /dev/null +++ b/udp_vu.h @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#ifndef UDP_VU_H +#define UDP_VU_H + +void udp_vu_sock_handler(const struct ctx *c, union epoll_ref ref, + uint32_t events, const struct timespec *now); +#endif /* UDP_VU_H */ -- 2.42.0