// SPDX-License-Identifier: GPL-2.0-or-later /* udp_vu.c - UDP L2 vhost-user management functions * * Copyright Red Hat * Author: Laurent Vivier */ #include #include #include #include #include #include #include #include #include #include #include #include "checksum.h" #include "util.h" #include "ip.h" #include "siphash.h" #include "inany.h" #include "passt.h" #include "pcap.h" #include "log.h" #include "vhost_user.h" #include "udp_internal.h" #include "flow.h" #include "flow_table.h" #include "udp_flow.h" #include "udp_vu.h" #include "vu_common.h" static struct iovec iov_vu [VIRTQUEUE_MAX_SIZE]; static struct vu_virtq_element elem [VIRTQUEUE_MAX_SIZE]; static struct iovec in_sg[VIRTQUEUE_MAX_SIZE]; static int in_sg_count; /** * udp_vu_l2_hdrlen() - return the size of the header in level 2 frame (UDP) * @v6: Set for IPv6 packet * * Return: Return the size of the header */ static size_t udp_vu_l2_hdrlen(bool v6) { size_t l2_hdrlen; l2_hdrlen = sizeof(struct virtio_net_hdr_mrg_rxbuf) + sizeof(struct ethhdr) + sizeof(struct udphdr); if (v6) l2_hdrlen += sizeof(struct ipv6hdr); else l2_hdrlen += sizeof(struct iphdr); return l2_hdrlen; } static int udp_vu_sock_init(int s, union sockaddr_inany *s_in) { struct msghdr msg = { .msg_name = s_in, .msg_namelen = sizeof(union sockaddr_inany), }; return recvmsg(s, &msg, MSG_PEEK | MSG_DONTWAIT); } /** * udp_vu_sock_recv() - Receive datagrams from socket into vhost-user buffers * @c: Execution context * @s: Socket to receive from * @events: epoll events bitmap * @v6: Set for IPv6 connections * @datalen: Size of received data (output) * * Return: Number of iov entries used to store the datagram */ static int udp_vu_sock_recv(const struct ctx *c, int s, uint32_t events, bool v6, ssize_t *data_len) { struct vu_dev *vdev = c->vdev; struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; int virtqueue_max, iov_cnt, idx, iov_used; size_t fillsize, size, off, l2_hdrlen; struct virtio_net_hdr_mrg_rxbuf *vh; struct msghdr msg = { 0 }; char *base; ASSERT(!c->no_udp); if (!(events & EPOLLIN)) return 0; /* compute L2 header length */ if (vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) virtqueue_max = VIRTQUEUE_MAX_SIZE; else virtqueue_max = 1; l2_hdrlen = udp_vu_l2_hdrlen(v6); fillsize = USHRT_MAX; iov_cnt = 0; in_sg_count = 0; while (fillsize && iov_cnt < virtqueue_max && in_sg_count < ARRAY_SIZE(in_sg)) { int ret; elem[iov_cnt].out_num = 0; elem[iov_cnt].out_sg = NULL; elem[iov_cnt].in_num = ARRAY_SIZE(in_sg) - in_sg_count; elem[iov_cnt].in_sg = &in_sg[in_sg_count]; ret = vu_queue_pop(vdev, vq, &elem[iov_cnt]); if (ret < 0) break; in_sg_count += elem[iov_cnt].in_num; if (elem[iov_cnt].in_num < 1) { err("virtio-net receive queue contains no in buffers"); vu_queue_rewind(vq, iov_cnt); return 0; } ASSERT(elem[iov_cnt].in_num == 1); ASSERT(elem[iov_cnt].in_sg[0].iov_len >= l2_hdrlen); if (iov_cnt == 0) { base = elem[iov_cnt].in_sg[0].iov_base; size = elem[iov_cnt].in_sg[0].iov_len; /* keep space for the headers */ iov_vu[0].iov_base = base + l2_hdrlen; iov_vu[0].iov_len = size - l2_hdrlen; } else { iov_vu[iov_cnt].iov_base = elem[iov_cnt].in_sg[0].iov_base; iov_vu[iov_cnt].iov_len = elem[iov_cnt].in_sg[0].iov_len; } if (iov_vu[iov_cnt].iov_len > fillsize) iov_vu[iov_cnt].iov_len = fillsize; fillsize -= iov_vu[iov_cnt].iov_len; iov_cnt++; } if (iov_cnt == 0) return 0; msg.msg_iov = iov_vu; msg.msg_iovlen = iov_cnt; *data_len = recvmsg(s, &msg, 0); if (*data_len < 0) { vu_queue_rewind(vq, iov_cnt); return 0; } /* restore original values */ iov_vu[0].iov_base = base; iov_vu[0].iov_len = size; /* count the numbers of buffer filled by recvmsg() */ idx = iov_skip_bytes(iov_vu, iov_cnt, l2_hdrlen + *data_len, &off); /* adjust last iov length */ if (idx < iov_cnt) iov_vu[idx].iov_len = off; iov_used = idx + !!off; /* release unused buffers */ vu_queue_rewind(vq, iov_cnt - iov_used); vh = (struct virtio_net_hdr_mrg_rxbuf *)base; vh->hdr = VU_HEADER; if (vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) vh->num_buffers = htole16(iov_used); return iov_used; } /** * udp_vu_prepare() - Prepare the packet header * @c: Execution context * @toside: Address information for one side of the flow * @datalen: Packet data length * * Return:i Level-4 length */ static size_t udp_vu_prepare(const struct ctx *c, const struct flowside *toside, ssize_t data_len) { struct ethhdr *eh; size_t l4len; /* ethernet header */ eh = vu_eth(iov_vu[0].iov_base); memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest)); memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source)); /* initialize header */ if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) { struct iphdr *iph = vu_ip(iov_vu[0].iov_base); struct udp_payload_t *bp = vu_payloadv4(iov_vu[0].iov_base); eh->h_proto = htons(ETH_P_IP); *iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_UDP); l4len = udp_update_hdr4(iph, bp, toside, data_len, true); } else { struct ipv6hdr *ip6h = vu_ip(iov_vu[0].iov_base); struct udp_payload_t *bp = vu_payloadv6(iov_vu[0].iov_base); eh->h_proto = htons(ETH_P_IPV6); *ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_UDP); l4len = udp_update_hdr6(ip6h, bp, toside, data_len, true); } return l4len; } /** * udp_vu_pcap() - Capture a single frame to pcap file (UDP) * @c: Execution context * @toside: ddress information for one side of the flow * @l4len: IPv4 Payload length * @iov_used: Length of the array */ static void udp_vu_pcap(const struct ctx *c, const struct flowside *toside, size_t l4len, int iov_used) { const struct in_addr *src4 = inany_v4(&toside->oaddr); const struct in_addr *dst4 = inany_v4(&toside->eaddr); char *base = iov_vu[0].iov_base; size_t size = iov_vu[0].iov_len; struct udp_payload_t *bp; uint32_t sum; if (!*c->pcap) return; if (src4 && dst4) { bp = vu_payloadv4(base); sum = proto_ipv4_header_psum(l4len, IPPROTO_UDP, *src4, *dst4); } else { bp = vu_payloadv6(base); sum = proto_ipv6_header_psum(l4len, IPPROTO_UDP, &toside->oaddr.a6, &toside->eaddr.a6); bp->uh.check = 0; /* by default, set to 0xffff */ } iov_vu[0].iov_base = &bp->uh; iov_vu[0].iov_len = size - ((char *)iov_vu[0].iov_base - base); bp->uh.check = csum_iov(iov_vu, iov_used, sum); /* set iov for pcap logging */ iov_vu[0].iov_base = base + sizeof(struct virtio_net_hdr_mrg_rxbuf); iov_vu[0].iov_len = size - sizeof(struct virtio_net_hdr_mrg_rxbuf); pcap_iov(iov_vu, iov_used); /* restore iov_vu[0] */ iov_vu[0].iov_base = base; iov_vu[0].iov_len = size; } /** * udp_vu_listen_sock_handler() - Handle new data from socket * @c: Execution context * @ref: epoll reference * @events: epoll events bitmap * @now: Current timestamp */ void udp_vu_listen_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events, const struct timespec *now) { struct vu_dev *vdev = c->vdev; struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; const struct flowside *toside; union sockaddr_inany s_in; flow_sidx_t batchsidx; uint8_t batchpif; bool v6; int i; if (udp_sock_errs(c, ref.fd, events) < 0) { err("UDP: Unrecoverable error on listening socket:" " (%s port %hu)", pif_name(ref.udp.pif), ref.udp.port); return; } if (udp_vu_sock_init(ref.fd, &s_in) < 0) return; batchsidx = udp_flow_from_sock(c, ref, &s_in, now); batchpif = pif_at_sidx(batchsidx); if (batchpif != PIF_TAP) { if (flow_sidx_valid(batchsidx)) { flow_sidx_t fromsidx = flow_sidx_opposite(batchsidx); struct udp_flow *uflow = udp_at_sidx(batchsidx); flow_err(uflow, "No support for forwarding UDP from %s to %s", pif_name(pif_at_sidx(fromsidx)), pif_name(batchpif)); } else { debug("Discarding 1 datagram without flow"); } return; } toside = flowside_at_sidx(batchsidx); v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)); for (i = 0; i < UDP_MAX_FRAMES; i++) { ssize_t data_len; size_t l4len; int iov_used; iov_used = udp_vu_sock_recv(c, ref.fd, events, v6, &data_len); if (iov_used <= 0) return; l4len = udp_vu_prepare(c, toside, data_len); udp_vu_pcap(c, toside, l4len, iov_used); vu_send_frame(vdev, vq, elem, iov_vu, iov_used); } } /** * udp_vu_reply_sock_handler() - Handle new data from flow specific socket * @c: Execution context * @ref: epoll reference * @events: epoll events bitmap * @now: Current timestamp */ void udp_vu_reply_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events, const struct timespec *now) { flow_sidx_t tosidx = flow_sidx_opposite(ref.flowside); const struct flowside *toside = flowside_at_sidx(tosidx); struct vu_dev *vdev = c->vdev; struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; struct udp_flow *uflow = udp_at_sidx(ref.flowside); int from_s = uflow->s[ref.flowside.sidei]; uint8_t topif = pif_at_sidx(tosidx); bool v6; int i; ASSERT(!c->no_udp); ASSERT(uflow); if (udp_sock_errs(c, from_s, events) < 0) { flow_err(uflow, "Unrecoverable error on reply socket"); flow_err_details(uflow); udp_flow_close(c, uflow); return; } if (topif != PIF_TAP) { uint8_t frompif = pif_at_sidx(ref.flowside); flow_err(uflow, "No support for forwarding UDP from %s to %s", pif_name(frompif), pif_name(topif)); return; } v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)); for (i = 0; i < UDP_MAX_FRAMES; i++) { ssize_t data_len; size_t l4len; int iov_used; iov_used = udp_vu_sock_recv(c, from_s, events, v6, &data_len); if (iov_used <= 0) return; flow_trace(uflow, "Received 1 datagram on reply socket"); uflow->ts = now->tv_sec; l4len = udp_vu_prepare(c, toside, data_len); udp_vu_pcap(c, toside, l4len, iov_used); vu_send_frame(vdev, vq, elem, iov_vu, iov_used); } }