/* SPDX-License-Identifier: GPL-2.0-or-later * Copyright Red Hat * Author: Laurent Vivier * * udp_vu.c - UDP L2 vhost-user management functions */ #include #include #include #include #include #include #include #include #include #include #include #include "checksum.h" #include "util.h" #include "ip.h" #include "siphash.h" #include "inany.h" #include "passt.h" #include "pcap.h" #include "log.h" #include "vhost_user.h" #include "udp_internal.h" #include "flow.h" #include "flow_table.h" #include "udp_flow.h" #include "udp_vu.h" #include "vu_common.h" /* vhost-user */ static const struct virtio_net_hdr vu_header = { .flags = VIRTIO_NET_HDR_F_DATA_VALID, .gso_type = VIRTIO_NET_HDR_GSO_NONE, }; static struct iovec iov_vu [VIRTQUEUE_MAX_SIZE]; static struct vu_virtq_element elem [VIRTQUEUE_MAX_SIZE]; static struct iovec in_sg[VIRTQUEUE_MAX_SIZE]; static int in_sg_count; static size_t udp_vu_l2_hdrlen(const struct vu_dev *vdev, bool v6) { size_t l2_hdrlen; l2_hdrlen = vdev->hdrlen + sizeof(struct ethhdr) + sizeof(struct udphdr); if (v6) l2_hdrlen += sizeof(struct ipv6hdr); else l2_hdrlen += sizeof(struct iphdr); return l2_hdrlen; } static int udp_vu_sock_recv(const struct ctx *c, union sockaddr_inany *s_in, int s, uint32_t events, bool v6, ssize_t *data_len) { struct vu_dev *vdev = c->vdev; struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; int virtqueue_max, iov_cnt, idx, iov_used; size_t fillsize, size, off, l2_hdrlen; struct virtio_net_hdr_mrg_rxbuf *vh; struct msghdr msg = { 0 }; char *base; ASSERT(!c->no_udp); /* Clear any errors first */ if (events & EPOLLERR) { while (udp_sock_recverr(s)) ; } if (!(events & EPOLLIN)) return 0; /* compute L2 header length */ if (vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) virtqueue_max = VIRTQUEUE_MAX_SIZE; else virtqueue_max = 1; l2_hdrlen = udp_vu_l2_hdrlen(vdev, v6); msg.msg_name = s_in; msg.msg_namelen = sizeof(union sockaddr_inany); fillsize = USHRT_MAX; iov_cnt = 0; in_sg_count = 0; while (fillsize && iov_cnt < virtqueue_max && in_sg_count < ARRAY_SIZE(in_sg)) { int ret; elem[iov_cnt].out_num = 0; elem[iov_cnt].out_sg = NULL; elem[iov_cnt].in_num = ARRAY_SIZE(in_sg) - in_sg_count; elem[iov_cnt].in_sg = &in_sg[in_sg_count]; ret = vu_queue_pop(vdev, vq, &elem[iov_cnt]); if (ret < 0) break; in_sg_count += elem[iov_cnt].in_num; if (elem[iov_cnt].in_num < 1) { err("virtio-net receive queue contains no in buffers"); vu_queue_rewind(vq, iov_cnt); return 0; } ASSERT(elem[iov_cnt].in_num == 1); ASSERT(elem[iov_cnt].in_sg[0].iov_len >= l2_hdrlen); if (iov_cnt == 0) { base = elem[iov_cnt].in_sg[0].iov_base; size = elem[iov_cnt].in_sg[0].iov_len; /* keep space for the headers */ iov_vu[0].iov_base = base + l2_hdrlen; iov_vu[0].iov_len = size - l2_hdrlen; } else { iov_vu[iov_cnt].iov_base = elem[iov_cnt].in_sg[0].iov_base; iov_vu[iov_cnt].iov_len = elem[iov_cnt].in_sg[0].iov_len; } if (iov_vu[iov_cnt].iov_len > fillsize) iov_vu[iov_cnt].iov_len = fillsize; fillsize -= iov_vu[iov_cnt].iov_len; iov_cnt++; } if (iov_cnt == 0) return 0; msg.msg_iov = iov_vu; msg.msg_iovlen = iov_cnt; *data_len = recvmsg(s, &msg, 0); if (*data_len < 0) { vu_queue_rewind(vq, iov_cnt); return 0; } /* restore original values */ iov_vu[0].iov_base = base; iov_vu[0].iov_len = size; /* count the numbers of buffer filled by recvmsg() */ idx = iov_skip_bytes(iov_vu, iov_cnt, l2_hdrlen + *data_len, &off); /* adjust last iov length */ if (idx < iov_cnt) iov_vu[idx].iov_len = off; iov_used = idx + !!off; /* release unused buffers */ vu_queue_rewind(vq, iov_cnt - iov_used); vh = (struct virtio_net_hdr_mrg_rxbuf *)base; vh->hdr = vu_header; if (vdev->hdrlen == sizeof(struct virtio_net_hdr_mrg_rxbuf)) vh->num_buffers = htole16(iov_used); return iov_used; } static size_t udp_vu_prepare(const struct ctx *c, const struct flowside *toside, ssize_t data_len) { const struct vu_dev *vdev = c->vdev; struct ethhdr *eh; size_t l4len; /* ethernet header */ eh = vu_eth(vdev, iov_vu[0].iov_base); memcpy(eh->h_dest, c->mac_guest, sizeof(eh->h_dest)); memcpy(eh->h_source, c->mac, sizeof(eh->h_source)); /* initialize header */ if (inany_v4(&toside->eaddr) && inany_v4(&toside->faddr)) { struct iphdr *iph = vu_ip(vdev, iov_vu[0].iov_base); struct udp_payload_t *bp = vu_payloadv4(vdev, iov_vu[0].iov_base); eh->h_proto = htons(ETH_P_IP); *iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_UDP); l4len = udp_update_hdr4(c, iph, bp, toside, data_len); } else { struct ipv6hdr *ip6h = vu_ip(vdev, iov_vu[0].iov_base); struct udp_payload_t *bp = vu_payloadv6(vdev, iov_vu[0].iov_base); eh->h_proto = htons(ETH_P_IPV6); *ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_UDP); l4len = udp_update_hdr6(c, ip6h, bp, toside, data_len); } return l4len; } static void udp_vu_pcap(const struct ctx *c, const struct flowside *toside, size_t l4len, int iov_used) { const struct in_addr *src = inany_v4(&toside->faddr); const struct in_addr *dst = inany_v4(&toside->eaddr); const struct vu_dev *vdev = c->vdev; char *base = iov_vu[0].iov_base; size_t size = iov_vu[0].iov_len; struct udp_payload_t *bp; uint32_t sum; if (!*c->pcap) return; if (src && dst) { bp = vu_payloadv4(vdev, base); sum = proto_ipv4_header_psum(l4len, IPPROTO_UDP, *src, *dst); } else { bp = vu_payloadv6(vdev, base); sum = proto_ipv6_header_psum(l4len, IPPROTO_UDP, &toside->faddr.a6, &toside->eaddr.a6); bp->uh.check = 0; /* by default, set to 0xffff */ } iov_vu[0].iov_base = &bp->uh; iov_vu[0].iov_len = size - ((char *)iov_vu[0].iov_base - base); bp->uh.check = csum_iov(iov_vu, iov_used, sum); /* set iov for pcap logging */ iov_vu[0].iov_base = base + vdev->hdrlen; iov_vu[0].iov_len = size - vdev->hdrlen; pcap_iov(iov_vu, iov_used); /* restore iov_vu[0] */ iov_vu[0].iov_base = base; iov_vu[0].iov_len = size; } void udp_vu_listen_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events, const struct timespec *now) { struct vu_dev *vdev = c->vdev; struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; bool v6 = ref.udp.v6; int i; for (i = 0; i < UDP_MAX_FRAMES; i++) { union sockaddr_inany s_in; flow_sidx_t batchsidx; uint8_t batchpif; ssize_t data_len; int iov_used; iov_used = udp_vu_sock_recv(c, &s_in, ref.fd, events, v6, &data_len); if (iov_used <= 0) return; batchsidx = udp_flow_from_sock(c, ref, &s_in, now); batchpif = pif_at_sidx(batchsidx); if (batchpif == PIF_TAP) { size_t l4len; l4len = udp_vu_prepare(c, flowside_at_sidx(batchsidx), data_len); udp_vu_pcap(c, flowside_at_sidx(batchsidx), l4len, iov_used); vu_send_frame(vdev, vq, elem, iov_vu, iov_used); } else if (flow_sidx_valid(batchsidx)) { flow_sidx_t fromsidx = flow_sidx_opposite(batchsidx); struct udp_flow *uflow = udp_at_sidx(batchsidx); flow_err(uflow, "No support for forwarding UDP from %s to %s", pif_name(pif_at_sidx(fromsidx)), pif_name(batchpif)); } else { debug("Discarding 1 datagram without flow"); } } } void udp_vu_reply_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events, const struct timespec *now) { flow_sidx_t tosidx = flow_sidx_opposite(ref.flowside); const struct flowside *toside = flowside_at_sidx(tosidx); struct vu_dev *vdev = c->vdev; struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; struct udp_flow *uflow = udp_at_sidx(ref.flowside); uint8_t topif = pif_at_sidx(tosidx); bool v6 = ref.udp.v6; int i; ASSERT(!c->no_udp && uflow); for (i = 0; i < UDP_MAX_FRAMES; i++) { union sockaddr_inany s_in; ssize_t data_len; int iov_used; iov_used = udp_vu_sock_recv(c, &s_in, ref.fd, events, v6, &data_len); if (iov_used <= 0) return; flow_trace(uflow, "Received 1 datagram on reply socket"); uflow->ts = now->tv_sec; if (topif == PIF_TAP) { size_t l4len; l4len = udp_vu_prepare(c, toside, data_len); udp_vu_pcap(c, toside, l4len, iov_used); vu_send_frame(vdev, vq, elem, iov_vu, iov_used); } else { uint8_t frompif = pif_at_sidx(ref.flowside); flow_err(uflow, "No support for forwarding UDP from %s to %s", pif_name(frompif), pif_name(topif)); } } }