From: Laurent Vivier <lvivier@redhat.com>
To: passt-dev@passt.top
Cc: Laurent Vivier <lvivier@redhat.com>
Subject: [PATCH v11 02/10] vhost-user: introduce virtio API
Date: Wed, 13 Nov 2024 09:04:01 +0100 [thread overview]
Message-ID: <20241113080411.476646-3-lvivier@redhat.com> (raw)
In-Reply-To: <20241113080411.476646-1-lvivier@redhat.com>
Add virtio.c and virtio.h that define the functions needed
to manage virtqueues.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
Makefile | 4 +-
util.h | 9 +
virtio.c | 665 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
virtio.h | 183 +++++++++++++++
4 files changed, 859 insertions(+), 2 deletions(-)
create mode 100644 virtio.c
create mode 100644 virtio.h
diff --git a/Makefile b/Makefile
index 258d298d4787..9b61a47e50fc 100644
--- a/Makefile
+++ b/Makefile
@@ -37,7 +37,7 @@ FLAGS += -DDUAL_STACK_SOCKETS=$(DUAL_STACK_SOCKETS)
PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c flow.c fwd.c \
icmp.c igmp.c inany.c iov.c ip.c isolation.c lineread.c log.c mld.c \
ndp.c netlink.c packet.c passt.c pasta.c pcap.c pif.c tap.c tcp.c \
- tcp_buf.c tcp_splice.c udp.c udp_flow.c util.c
+ tcp_buf.c tcp_splice.c udp.c udp_flow.c util.c virtio.c
QRAP_SRCS = qrap.c
SRCS = $(PASST_SRCS) $(QRAP_SRCS)
@@ -47,7 +47,7 @@ PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h fwd.h \
flow_table.h icmp.h icmp_flow.h inany.h iov.h ip.h isolation.h \
lineread.h log.h ndp.h netlink.h packet.h passt.h pasta.h pcap.h pif.h \
siphash.h tap.h tcp.h tcp_buf.h tcp_conn.h tcp_internal.h tcp_splice.h \
- udp.h udp_flow.h util.h
+ udp.h udp_flow.h util.h virtio.h
HEADERS = $(PASST_HEADERS) seccomp.h
C := \#include <sys/random.h>\nint main(){int a=getrandom(0, 0, 0);}
diff --git a/util.h b/util.h
index 3616515ab3ae..57d601bd8e8d 100644
--- a/util.h
+++ b/util.h
@@ -144,7 +144,16 @@ static inline uint32_t ntohl_unaligned(const void *p)
return ntohl(val);
}
+static inline void barrier(void) { __asm__ __volatile__("" ::: "memory"); }
+#define smp_mb() do { barrier(); __atomic_thread_fence(__ATOMIC_SEQ_CST); } while (0)
+#define smp_mb_release() do { barrier(); __atomic_thread_fence(__ATOMIC_RELEASE); } while (0)
+#define smp_mb_acquire() do { barrier(); __atomic_thread_fence(__ATOMIC_ACQUIRE); } while (0)
+
+#define smp_wmb() smp_mb_release()
+#define smp_rmb() smp_mb_acquire()
+
#define NS_FN_STACK_SIZE (1024 * 1024) /* 1MiB */
+
int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
void *arg);
#define NS_CALL(fn, arg) \
diff --git a/virtio.c b/virtio.c
new file mode 100644
index 000000000000..380590afbca3
--- /dev/null
+++ b/virtio.c
@@ -0,0 +1,665 @@
+// SPDX-License-Identifier: GPL-2.0-or-later AND BSD-3-Clause
+/*
+ * virtio API, vring and virtqueue functions definition
+ *
+ * Copyright Red Hat
+ * Author: Laurent Vivier <lvivier@redhat.com>
+ */
+
+/* Some parts copied from QEMU subprojects/libvhost-user/libvhost-user.c
+ * originally licensed under the following terms:
+ *
+ * --
+ *
+ * Copyright IBM, Corp. 2007
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ * Marc-André Lureau <mlureau@redhat.com>
+ * Victor Kaplansky <victork@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ *
+ * Some parts copied from QEMU hw/virtio/virtio.c
+ * licensed under the following terms:
+ *
+ * Copyright IBM, Corp. 2007
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * --
+ *
+ * virtq_used_event() and virtq_avail_event() from
+ * https://docs.oasis-open.org/virtio/virtio/v1.2/csd01/virtio-v1.2-csd01.html#x1-712000A
+ * licensed under the following terms:
+ *
+ * --
+ *
+ * This header is BSD licensed so anyone can use the definitions
+ * to implement compatible drivers/servers.
+ *
+ * Copyright 2007, 2009, IBM Corporation
+ * Copyright 2011, Red Hat, Inc
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ‘‘AS IS’’ AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stddef.h>
+#include <endian.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/eventfd.h>
+#include <sys/socket.h>
+
+#include "util.h"
+#include "virtio.h"
+
+#define VIRTQUEUE_MAX_SIZE 1024
+
+/**
+ * vu_gpa_to_va() - Translate guest physical address to our virtual address.
+ * @dev: Vhost-user device
+ * @plen: Physical length to map (input), capped to region (output)
+ * @guest_addr: Guest physical address
+ *
+ * Return: virtual address in our address space of the guest physical address
+ */
+static void *vu_gpa_to_va(struct vu_dev *dev, uint64_t *plen, uint64_t guest_addr)
+{
+ unsigned int i;
+
+ if (*plen == 0)
+ return NULL;
+
+ /* Find matching memory region. */
+ for (i = 0; i < dev->nregions; i++) {
+ const struct vu_dev_region *r = &dev->regions[i];
+
+ if ((guest_addr >= r->gpa) &&
+ (guest_addr < (r->gpa + r->size))) {
+ if ((guest_addr + *plen) > (r->gpa + r->size))
+ *plen = r->gpa + r->size - guest_addr;
+ /* NOLINTNEXTLINE(performance-no-int-to-ptr) */
+ return (void *)(guest_addr - r->gpa + r->mmap_addr +
+ r->mmap_offset);
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * vring_avail_flags() - Read the available ring flags
+ * @vq: Virtqueue
+ *
+ * Return: the available ring descriptor flags of the given virtqueue
+ */
+static inline uint16_t vring_avail_flags(const struct vu_virtq *vq)
+{
+ return le16toh(vq->vring.avail->flags);
+}
+
+/**
+ * vring_avail_idx() - Read the available ring index
+ * @vq: Virtqueue
+ *
+ * Return: the available ring index of the given virtqueue
+ */
+static inline uint16_t vring_avail_idx(struct vu_virtq *vq)
+{
+ vq->shadow_avail_idx = le16toh(vq->vring.avail->idx);
+
+ return vq->shadow_avail_idx;
+}
+
+/**
+ * vring_avail_ring() - Read an available ring entry
+ * @vq: Virtqueue
+ * @i: Index of the entry to read
+ *
+ * Return: the ring entry content (head of the descriptor chain)
+ */
+static inline uint16_t vring_avail_ring(const struct vu_virtq *vq, int i)
+{
+ return le16toh(vq->vring.avail->ring[i]);
+}
+
+/**
+ * virtq_used_event - Get location of used event indices
+ * (only with VIRTIO_F_EVENT_IDX)
+ * @vq Virtqueue
+ *
+ * Return: return the location of the used event index
+ */
+static inline uint16_t *virtq_used_event(const struct vu_virtq *vq)
+{
+ /* For backwards compat, used event index is at *end* of avail ring. */
+ return &vq->vring.avail->ring[vq->vring.num];
+}
+
+/**
+ * vring_get_used_event() - Get the used event from the available ring
+ * @vq Virtqueue
+ *
+ * Return: the used event (available only if VIRTIO_RING_F_EVENT_IDX is set)
+ * used_event is a performant alternative where the driver
+ * specifies how far the device can progress before a notification
+ * is required.
+ */
+static inline uint16_t vring_get_used_event(const struct vu_virtq *vq)
+{
+ return le16toh(*virtq_used_event(vq));
+}
+
+/**
+ * virtqueue_get_head() - Get the head of the descriptor chain for a given
+ * index
+ * @vq: Virtqueue
+ * @idx: Available ring entry index
+ * @head: Head of the descriptor chain
+ */
+static void virtqueue_get_head(const struct vu_virtq *vq,
+ unsigned int idx, unsigned int *head)
+{
+ /* Grab the next descriptor number they're advertising, and increment
+ * the index we've seen.
+ */
+ *head = vring_avail_ring(vq, idx % vq->vring.num);
+
+ /* If their number is silly, that's a fatal mistake. */
+ if (*head >= vq->vring.num)
+ die("vhost-user: Guest says index %u is available", *head);
+}
+
+/**
+ * virtqueue_read_indirect_desc() - Copy virtio ring descriptors from guest
+ * memory
+ * @dev: Vhost-user device
+ * @desc: Destination address to copy the descriptors to
+ * @addr: Guest memory address to copy from
+ * @len: Length of memory to copy
+ *
+ * Return: -1 if there is an error, 0 otherwise
+ */
+static int virtqueue_read_indirect_desc(struct vu_dev *dev, struct vring_desc *desc,
+ uint64_t addr, size_t len)
+{
+ uint64_t read_len;
+
+ if (len > (VIRTQUEUE_MAX_SIZE * sizeof(struct vring_desc)))
+ return -1;
+
+ if (len == 0)
+ return -1;
+
+ while (len) {
+ const struct vring_desc *orig_desc;
+
+ read_len = len;
+ orig_desc = vu_gpa_to_va(dev, &read_len, addr);
+ if (!orig_desc)
+ return -1;
+
+ memcpy(desc, orig_desc, read_len);
+ len -= read_len;
+ addr += read_len;
+ desc += read_len / sizeof(struct vring_desc);
+ }
+
+ return 0;
+}
+
+/**
+ * enum virtqueue_read_desc_state - State in the descriptor chain
+ * @VIRTQUEUE_READ_DESC_ERROR Found an invalid descriptor
+ * @VIRTQUEUE_READ_DESC_DONE No more descriptors in the chain
+ * @VIRTQUEUE_READ_DESC_MORE there are more descriptors in the chain
+ */
+enum virtqueue_read_desc_state {
+ VIRTQUEUE_READ_DESC_ERROR = -1,
+ VIRTQUEUE_READ_DESC_DONE = 0, /* end of chain */
+ VIRTQUEUE_READ_DESC_MORE = 1, /* more buffers in chain */
+};
+
+/**
+ * virtqueue_read_next_desc() - Read the the next descriptor in the chain
+ * @desc: Virtio ring descriptors
+ * @i: Index of the current descriptor
+ * @max: Maximum value of the descriptor index
+ * @next: Index of the next descriptor in the chain (output value)
+ *
+ * Return: current chain descriptor state (error, next, done)
+ */
+static int virtqueue_read_next_desc(const struct vring_desc *desc,
+ int i, unsigned int max, unsigned int *next)
+{
+ /* If this descriptor says it doesn't chain, we're done. */
+ if (!(le16toh(desc[i].flags) & VRING_DESC_F_NEXT))
+ return VIRTQUEUE_READ_DESC_DONE;
+
+ /* Check they're not leading us off end of descriptors. */
+ *next = le16toh(desc[i].next);
+ /* Make sure compiler knows to grab that: we don't want it changing! */
+ smp_wmb();
+
+ if (*next >= max)
+ return VIRTQUEUE_READ_DESC_ERROR;
+
+ return VIRTQUEUE_READ_DESC_MORE;
+}
+
+/**
+ * vu_queue_empty() - Check if virtqueue is empty
+ * @vq: Virtqueue
+ *
+ * Return: true if the virtqueue is empty, false otherwise
+ */
+bool vu_queue_empty(struct vu_virtq *vq)
+{
+ if (!vq->vring.avail)
+ return true;
+
+ if (vq->shadow_avail_idx != vq->last_avail_idx)
+ return false;
+
+ return vring_avail_idx(vq) == vq->last_avail_idx;
+}
+
+/**
+ * vring_can_notify() - Check if a notification can be sent
+ * @dev: Vhost-user device
+ * @vq: Virtqueue
+ *
+ * Return: true if notification can be sent
+ */
+static bool vring_can_notify(const struct vu_dev *dev, struct vu_virtq *vq)
+{
+ uint16_t old, new;
+ bool v;
+
+ /* We need to expose used array entries before checking used event. */
+ smp_mb();
+
+ /* Always notify when queue is empty (when feature acknowledge) */
+ if (vu_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
+ !vq->inuse && vu_queue_empty(vq))
+ return true;
+
+ if (!vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX))
+ return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
+
+ v = vq->signalled_used_valid;
+ vq->signalled_used_valid = true;
+ old = vq->signalled_used;
+ new = vq->signalled_used = vq->used_idx;
+ return !v || vring_need_event(vring_get_used_event(vq), new, old);
+}
+
+/**
+ * vu_queue_notify() - Send a notification to the given virtqueue
+ * @dev: Vhost-user device
+ * @vq: Virtqueue
+ */
+/* cppcheck-suppress unusedFunction */
+void vu_queue_notify(const struct vu_dev *dev, struct vu_virtq *vq)
+{
+ if (!vq->vring.avail)
+ return;
+
+ if (!vring_can_notify(dev, vq)) {
+ debug("vhost-user: virtqueue can skip notify...");
+ return;
+ }
+
+ if (eventfd_write(vq->call_fd, 1) < 0)
+ die_perror("Error writing vhost-user queue eventfd");
+}
+
+/* virtq_avail_event() - Get location of available event indices
+ * (only with VIRTIO_F_EVENT_IDX)
+ * @vq: Virtqueue
+ *
+ * Return: return the location of the available event index
+ */
+static inline uint16_t *virtq_avail_event(const struct vu_virtq *vq)
+{
+ /* For backwards compat, avail event index is at *end* of used ring. */
+ return (uint16_t *)&vq->vring.used->ring[vq->vring.num];
+}
+
+/**
+ * vring_set_avail_event() - Set avail_event
+ * @vq: Virtqueue
+ * @val: Value to set to avail_event
+ * avail_event is used in the same way the used_event is in the
+ * avail_ring.
+ * avail_event is used to advise the driver that notifications
+ * are unnecessary until the driver writes entry with an index
+ * specified by avail_event into the available ring.
+ */
+static inline void vring_set_avail_event(const struct vu_virtq *vq,
+ uint16_t val)
+{
+ uint16_t val_le = htole16(val);
+
+ if (!vq->notification)
+ return;
+
+ memcpy(virtq_avail_event(vq), &val_le, sizeof(val_le));
+}
+
+/**
+ * virtqueue_map_desc() - Translate descriptor ring physical address into our
+ * virtual address space
+ * @dev: Vhost-user device
+ * @p_num_sg: First iov entry to use (input),
+ * first iov entry not used (output)
+ * @iov: Iov array to use to store buffer virtual addresses
+ * @max_num_sg: Maximum number of iov entries
+ * @pa: Guest physical address of the buffer to map into our virtual
+ * address
+ * @sz: Size of the buffer
+ *
+ * Return: false on error, true otherwise
+ */
+static bool virtqueue_map_desc(struct vu_dev *dev,
+ unsigned int *p_num_sg, struct iovec *iov,
+ unsigned int max_num_sg,
+ uint64_t pa, size_t sz)
+{
+ unsigned int num_sg = *p_num_sg;
+
+ ASSERT(num_sg < max_num_sg);
+ ASSERT(sz);
+
+ while (sz) {
+ uint64_t len = sz;
+
+ iov[num_sg].iov_base = vu_gpa_to_va(dev, &len, pa);
+ if (iov[num_sg].iov_base == NULL)
+ die("vhost-user: invalid address for buffers");
+ iov[num_sg].iov_len = len;
+ num_sg++;
+ sz -= len;
+ pa += len;
+ }
+
+ *p_num_sg = num_sg;
+ return true;
+}
+
+/**
+ * vu_queue_map_desc - Map the virtqueue descriptor ring into our virtual
+ * address space
+ * @dev: Vhost-user device
+ * @vq: Virtqueue
+ * @idx: First descriptor ring entry to map
+ * @elem: Virtqueue element to store descriptor ring iov
+ *
+ * Return: -1 if there is an error, 0 otherwise
+ */
+static int vu_queue_map_desc(struct vu_dev *dev, struct vu_virtq *vq, unsigned int idx,
+ struct vu_virtq_element *elem)
+{
+ const struct vring_desc *desc = vq->vring.desc;
+ struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE];
+ unsigned int out_num = 0, in_num = 0;
+ unsigned int max = vq->vring.num;
+ unsigned int i = idx;
+ uint64_t read_len;
+ int rc;
+
+ if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
+ unsigned int desc_len;
+ uint64_t desc_addr;
+
+ if (le32toh(desc[i].len) % sizeof(struct vring_desc))
+ die("vhost-user: Invalid size for indirect buffer table");
+
+ /* loop over the indirect descriptor table */
+ desc_addr = le64toh(desc[i].addr);
+ desc_len = le32toh(desc[i].len);
+ max = desc_len / sizeof(struct vring_desc);
+ read_len = desc_len;
+ desc = vu_gpa_to_va(dev, &read_len, desc_addr);
+ if (desc && read_len != desc_len) {
+ /* Failed to use zero copy */
+ desc = NULL;
+ if (!virtqueue_read_indirect_desc(dev, desc_buf, desc_addr, desc_len))
+ desc = desc_buf;
+ }
+ if (!desc)
+ die("vhost-user: Invalid indirect buffer table");
+ i = 0;
+ }
+
+ /* Collect all the descriptors */
+ do {
+ if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
+ if (!virtqueue_map_desc(dev, &in_num, elem->in_sg,
+ elem->in_num,
+ le64toh(desc[i].addr),
+ le32toh(desc[i].len)))
+ return -1;
+ } else {
+ if (in_num)
+ die("Incorrect order for descriptors");
+ if (!virtqueue_map_desc(dev, &out_num, elem->out_sg,
+ elem->out_num,
+ le64toh(desc[i].addr),
+ le32toh(desc[i].len))) {
+ return -1;
+ }
+ }
+
+ /* If we've got too many, that implies a descriptor loop. */
+ if ((in_num + out_num) > max)
+ die("vhost-user: Loop in queue descriptor list");
+ rc = virtqueue_read_next_desc(desc, i, max, &i);
+ } while (rc == VIRTQUEUE_READ_DESC_MORE);
+
+ if (rc == VIRTQUEUE_READ_DESC_ERROR)
+ die("vhost-user: Failed to read descriptor list");
+
+ elem->index = idx;
+ elem->in_num = in_num;
+ elem->out_num = out_num;
+
+ return 0;
+}
+
+/**
+ * vu_queue_pop() - Pop an entry from the virtqueue
+ * @dev: Vhost-user device
+ * @vq: Virtqueue
+ * @elem: Virtqueue element to file with the entry information
+ *
+ * Return: -1 if there is an error, 0 otherwise
+ */
+/* cppcheck-suppress unusedFunction */
+int vu_queue_pop(struct vu_dev *dev, struct vu_virtq *vq, struct vu_virtq_element *elem)
+{
+ unsigned int head;
+ int ret;
+
+ if (!vq->vring.avail)
+ return -1;
+
+ if (vu_queue_empty(vq))
+ return -1;
+
+ /* Needed after vu_queue_empty(), see comment in
+ * virtqueue_num_heads().
+ */
+ smp_rmb();
+
+ if (vq->inuse >= vq->vring.num)
+ die("vhost-user queue size exceeded");
+
+ virtqueue_get_head(vq, vq->last_avail_idx++, &head);
+
+ if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX))
+ vring_set_avail_event(vq, vq->last_avail_idx);
+
+ ret = vu_queue_map_desc(dev, vq, head, elem);
+
+ if (ret < 0)
+ return ret;
+
+ vq->inuse++;
+
+ return 0;
+}
+
+/**
+ * vu_queue_detach_element() - Detach an element from the virqueue
+ * @vq: Virtqueue
+ */
+void vu_queue_detach_element(struct vu_virtq *vq)
+{
+ vq->inuse--;
+ /* unmap, when DMA support is added */
+}
+
+/**
+ * vu_queue_unpop() - Push back the previously popped element from the virqueue
+ * @vq: Virtqueue
+ */
+/* cppcheck-suppress unusedFunction */
+void vu_queue_unpop(struct vu_virtq *vq)
+{
+ vq->last_avail_idx--;
+ vu_queue_detach_element(vq);
+}
+
+/**
+ * vu_queue_rewind() - Push back a given number of popped elements
+ * @vq: Virtqueue
+ * @num: Number of element to unpop
+ */
+/* cppcheck-suppress unusedFunction */
+bool vu_queue_rewind(struct vu_virtq *vq, unsigned int num)
+{
+ if (num > vq->inuse)
+ return false;
+
+ vq->last_avail_idx -= num;
+ vq->inuse -= num;
+ return true;
+}
+
+/**
+ * vring_used_write() - Write an entry in the used ring
+ * @vq: Virtqueue
+ * @uelem: Entry to write
+ * @i: Index of the entry in the used ring
+ */
+static inline void vring_used_write(struct vu_virtq *vq,
+ const struct vring_used_elem *uelem, int i)
+{
+ struct vring_used *used = vq->vring.used;
+
+ used->ring[i] = *uelem;
+}
+
+/**
+ * vu_queue_fill_by_index() - Update information of a descriptor ring entry
+ * in the used ring
+ * @vq: Virtqueue
+ * @index: Descriptor ring index
+ * @len: Size of the element
+ * @idx: Used ring entry index
+ */
+void vu_queue_fill_by_index(struct vu_virtq *vq, unsigned int index,
+ unsigned int len, unsigned int idx)
+{
+ struct vring_used_elem uelem;
+
+ if (!vq->vring.avail)
+ return;
+
+ idx = (idx + vq->used_idx) % vq->vring.num;
+
+ uelem.id = htole32(index);
+ uelem.len = htole32(len);
+ vring_used_write(vq, &uelem, idx);
+}
+
+/**
+ * vu_queue_fill() - Update information of a given element in the used ring
+ * @dev: Vhost-user device
+ * @vq: Virtqueue
+ * @elem: Element information to fill
+ * @len: Size of the element
+ * @idx: Used ring entry index
+ */
+/* cppcheck-suppress unusedFunction */
+void vu_queue_fill(struct vu_virtq *vq, const struct vu_virtq_element *elem,
+ unsigned int len, unsigned int idx)
+{
+ vu_queue_fill_by_index(vq, elem->index, len, idx);
+}
+
+/**
+ * vring_used_idx_set() - Set the descriptor ring current index
+ * @vq: Virtqueue
+ * @val: Value to set in the index
+ */
+static inline void vring_used_idx_set(struct vu_virtq *vq, uint16_t val)
+{
+ vq->vring.used->idx = htole16(val);
+
+ vq->used_idx = val;
+}
+
+/**
+ * vu_queue_flush() - Flush the virtqueue
+ * @vq: Virtqueue
+ * @count: Number of entry to flush
+ */
+/* cppcheck-suppress unusedFunction */
+void vu_queue_flush(struct vu_virtq *vq, unsigned int count)
+{
+ uint16_t old, new;
+
+ if (!vq->vring.avail)
+ return;
+
+ /* Make sure buffer is written before we update index. */
+ smp_wmb();
+
+ old = vq->used_idx;
+ new = old + count;
+ vring_used_idx_set(vq, new);
+ vq->inuse -= count;
+ if ((uint16_t)(new - vq->signalled_used) < (uint16_t)(new - old))
+ vq->signalled_used_valid = false;
+}
diff --git a/virtio.h b/virtio.h
new file mode 100644
index 000000000000..94efeb049fbc
--- /dev/null
+++ b/virtio.h
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * virtio API, vring and virtqueue functions definition
+ *
+ * Copyright Red Hat
+ * Author: Laurent Vivier <lvivier@redhat.com>
+ */
+
+#ifndef VIRTIO_H
+#define VIRTIO_H
+
+#include <stdbool.h>
+#include <linux/vhost_types.h>
+
+/* Maximum size of a virtqueue */
+#define VIRTQUEUE_MAX_SIZE 1024
+
+/**
+ * struct vu_ring - Virtqueue rings
+ * @num: Size of the queue
+ * @desc: Descriptor ring
+ * @avail: Available ring
+ * @used: Used ring
+ * @log_guest_addr: Guest address for logging
+ * @flags: Vring flags
+ * VHOST_VRING_F_LOG is set if log address is valid
+ */
+struct vu_ring {
+ unsigned int num;
+ struct vring_desc *desc;
+ struct vring_avail *avail;
+ struct vring_used *used;
+ uint64_t log_guest_addr;
+ uint32_t flags;
+};
+
+/**
+ * struct vu_virtq - Virtqueue definition
+ * @vring: Virtqueue rings
+ * @last_avail_idx: Next head to pop
+ * @shadow_avail_idx: Last avail_idx read from VQ.
+ * @used_idx: Descriptor ring current index
+ * @signalled_used: Last used index value we have signalled on
+ * @signalled_used_valid: True if signalled_used if valid
+ * @notification: True if the queues notify (via event
+ * index or interrupt)
+ * @inuse: Number of entries in use
+ * @call_fd: The event file descriptor to signal when
+ * buffers are used.
+ * @kick_fd: The event file descriptor for adding
+ * buffers to the vring
+ * @err_fd: The event file descriptor to signal when
+ * error occurs
+ * @enable: True if the virtqueue is enabled
+ * @started: True if the virtqueue is started
+ * @vra: QEMU address of our rings
+ */
+struct vu_virtq {
+ struct vu_ring vring;
+ uint16_t last_avail_idx;
+ uint16_t shadow_avail_idx;
+ uint16_t used_idx;
+ uint16_t signalled_used;
+ bool signalled_used_valid;
+ bool notification;
+ unsigned int inuse;
+ int call_fd;
+ int kick_fd;
+ int err_fd;
+ unsigned int enable;
+ bool started;
+ struct vhost_vring_addr vra;
+};
+
+/**
+ * struct vu_dev_region - guest shared memory region
+ * @gpa: Guest physical address of the region
+ * @size: Memory size in bytes
+ * @qva: QEMU virtual address
+ * @mmap_offset: Offset where the region starts in the mapped memory
+ * @mmap_addr: Address of the mapped memory
+ */
+struct vu_dev_region {
+ uint64_t gpa;
+ uint64_t size;
+ uint64_t qva;
+ uint64_t mmap_offset;
+ uint64_t mmap_addr;
+};
+
+#define VHOST_USER_MAX_QUEUES 2
+
+/*
+ * Set a reasonable maximum number of ram slots, which will be supported by
+ * any architecture.
+ */
+#define VHOST_USER_MAX_RAM_SLOTS 32
+
+/**
+ * struct vu_dev - vhost-user device information
+ * @context: Execution context
+ * @nregions: Number of shared memory regions
+ * @regions: Guest shared memory regions
+ * @features: Vhost-user features
+ * @protocol_features: Vhost-user protocol features
+ */
+struct vu_dev {
+ uint32_t nregions;
+ struct vu_dev_region regions[VHOST_USER_MAX_RAM_SLOTS];
+ struct vu_virtq vq[VHOST_USER_MAX_QUEUES];
+ uint64_t features;
+ uint64_t protocol_features;
+};
+
+/**
+ * struct vu_virtq_element - virtqueue element
+ * @index: Descriptor ring index
+ * @out_num: Number of outgoing iovec buffers
+ * @in_num: Number of incoming iovec buffers
+ * @in_sg: Incoming iovec buffers
+ * @out_sg: Outgoing iovec buffers
+ */
+struct vu_virtq_element {
+ unsigned int index;
+ unsigned int out_num;
+ unsigned int in_num;
+ struct iovec *in_sg;
+ struct iovec *out_sg;
+};
+
+/**
+ * has_feature() - Check a feature bit in a features set
+ * @features: Features set
+ * @fb: Feature bit to check
+ *
+ * Return: True if the feature bit is set
+ */
+static inline bool has_feature(uint64_t features, unsigned int fbit)
+{
+ return !!(features & (1ULL << fbit));
+}
+
+/**
+ * vu_has_feature() - Check if a virtio-net feature is available
+ * @vdev: Vhost-user device
+ * @bit: Feature to check
+ *
+ * Return: True if the feature is available
+ */
+static inline bool vu_has_feature(const struct vu_dev *vdev,
+ unsigned int fbit)
+{
+ return has_feature(vdev->features, fbit);
+}
+
+/**
+ * vu_has_protocol_feature() - Check if a vhost-user feature is available
+ * @vdev: Vhost-user device
+ * @bit: Feature to check
+ *
+ * Return: True if the feature is available
+ */
+/* cppcheck-suppress unusedFunction */
+static inline bool vu_has_protocol_feature(const struct vu_dev *vdev,
+ unsigned int fbit)
+{
+ return has_feature(vdev->protocol_features, fbit);
+}
+
+bool vu_queue_empty(struct vu_virtq *vq);
+void vu_queue_notify(const struct vu_dev *dev, struct vu_virtq *vq);
+int vu_queue_pop(struct vu_dev *dev, struct vu_virtq *vq,
+ struct vu_virtq_element *elem);
+void vu_queue_detach_element(struct vu_virtq *vq);
+void vu_queue_unpop(struct vu_virtq *vq);
+bool vu_queue_rewind(struct vu_virtq *vq, unsigned int num);
+void vu_queue_fill_by_index(struct vu_virtq *vq, unsigned int index,
+ unsigned int len, unsigned int idx);
+void vu_queue_fill(struct vu_virtq *vq,
+ const struct vu_virtq_element *elem, unsigned int len,
+ unsigned int idx);
+void vu_queue_flush(struct vu_virtq *vq, unsigned int count);
+#endif /* VIRTIO_H */
--
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * virtio API, vring and virtqueue functions definition
+ *
+ * Copyright Red Hat
+ * Author: Laurent Vivier <lvivier@redhat.com>
+ */
+
+#ifndef VIRTIO_H
+#define VIRTIO_H
+
+#include <stdbool.h>
+#include <linux/vhost_types.h>
+
+/* Maximum size of a virtqueue */
+#define VIRTQUEUE_MAX_SIZE 1024
+
+/**
+ * struct vu_ring - Virtqueue rings
+ * @num: Size of the queue
+ * @desc: Descriptor ring
+ * @avail: Available ring
+ * @used: Used ring
+ * @log_guest_addr: Guest address for logging
+ * @flags: Vring flags
+ * VHOST_VRING_F_LOG is set if log address is valid
+ */
+struct vu_ring {
+ unsigned int num;
+ struct vring_desc *desc;
+ struct vring_avail *avail;
+ struct vring_used *used;
+ uint64_t log_guest_addr;
+ uint32_t flags;
+};
+
+/**
+ * struct vu_virtq - Virtqueue definition
+ * @vring: Virtqueue rings
+ * @last_avail_idx: Next head to pop
+ * @shadow_avail_idx: Last avail_idx read from VQ.
+ * @used_idx: Descriptor ring current index
+ * @signalled_used: Last used index value we have signalled on
+ * @signalled_used_valid: True if signalled_used if valid
+ * @notification: True if the queues notify (via event
+ * index or interrupt)
+ * @inuse: Number of entries in use
+ * @call_fd: The event file descriptor to signal when
+ * buffers are used.
+ * @kick_fd: The event file descriptor for adding
+ * buffers to the vring
+ * @err_fd: The event file descriptor to signal when
+ * error occurs
+ * @enable: True if the virtqueue is enabled
+ * @started: True if the virtqueue is started
+ * @vra: QEMU address of our rings
+ */
+struct vu_virtq {
+ struct vu_ring vring;
+ uint16_t last_avail_idx;
+ uint16_t shadow_avail_idx;
+ uint16_t used_idx;
+ uint16_t signalled_used;
+ bool signalled_used_valid;
+ bool notification;
+ unsigned int inuse;
+ int call_fd;
+ int kick_fd;
+ int err_fd;
+ unsigned int enable;
+ bool started;
+ struct vhost_vring_addr vra;
+};
+
+/**
+ * struct vu_dev_region - guest shared memory region
+ * @gpa: Guest physical address of the region
+ * @size: Memory size in bytes
+ * @qva: QEMU virtual address
+ * @mmap_offset: Offset where the region starts in the mapped memory
+ * @mmap_addr: Address of the mapped memory
+ */
+struct vu_dev_region {
+ uint64_t gpa;
+ uint64_t size;
+ uint64_t qva;
+ uint64_t mmap_offset;
+ uint64_t mmap_addr;
+};
+
+#define VHOST_USER_MAX_QUEUES 2
+
+/*
+ * Set a reasonable maximum number of ram slots, which will be supported by
+ * any architecture.
+ */
+#define VHOST_USER_MAX_RAM_SLOTS 32
+
+/**
+ * struct vu_dev - vhost-user device information
+ * @context: Execution context
+ * @nregions: Number of shared memory regions
+ * @regions: Guest shared memory regions
+ * @features: Vhost-user features
+ * @protocol_features: Vhost-user protocol features
+ */
+struct vu_dev {
+ uint32_t nregions;
+ struct vu_dev_region regions[VHOST_USER_MAX_RAM_SLOTS];
+ struct vu_virtq vq[VHOST_USER_MAX_QUEUES];
+ uint64_t features;
+ uint64_t protocol_features;
+};
+
+/**
+ * struct vu_virtq_element - virtqueue element
+ * @index: Descriptor ring index
+ * @out_num: Number of outgoing iovec buffers
+ * @in_num: Number of incoming iovec buffers
+ * @in_sg: Incoming iovec buffers
+ * @out_sg: Outgoing iovec buffers
+ */
+struct vu_virtq_element {
+ unsigned int index;
+ unsigned int out_num;
+ unsigned int in_num;
+ struct iovec *in_sg;
+ struct iovec *out_sg;
+};
+
+/**
+ * has_feature() - Check a feature bit in a features set
+ * @features: Features set
+ * @fb: Feature bit to check
+ *
+ * Return: True if the feature bit is set
+ */
+static inline bool has_feature(uint64_t features, unsigned int fbit)
+{
+ return !!(features & (1ULL << fbit));
+}
+
+/**
+ * vu_has_feature() - Check if a virtio-net feature is available
+ * @vdev: Vhost-user device
+ * @bit: Feature to check
+ *
+ * Return: True if the feature is available
+ */
+static inline bool vu_has_feature(const struct vu_dev *vdev,
+ unsigned int fbit)
+{
+ return has_feature(vdev->features, fbit);
+}
+
+/**
+ * vu_has_protocol_feature() - Check if a vhost-user feature is available
+ * @vdev: Vhost-user device
+ * @bit: Feature to check
+ *
+ * Return: True if the feature is available
+ */
+/* cppcheck-suppress unusedFunction */
+static inline bool vu_has_protocol_feature(const struct vu_dev *vdev,
+ unsigned int fbit)
+{
+ return has_feature(vdev->protocol_features, fbit);
+}
+
+bool vu_queue_empty(struct vu_virtq *vq);
+void vu_queue_notify(const struct vu_dev *dev, struct vu_virtq *vq);
+int vu_queue_pop(struct vu_dev *dev, struct vu_virtq *vq,
+ struct vu_virtq_element *elem);
+void vu_queue_detach_element(struct vu_virtq *vq);
+void vu_queue_unpop(struct vu_virtq *vq);
+bool vu_queue_rewind(struct vu_virtq *vq, unsigned int num);
+void vu_queue_fill_by_index(struct vu_virtq *vq, unsigned int index,
+ unsigned int len, unsigned int idx);
+void vu_queue_fill(struct vu_virtq *vq,
+ const struct vu_virtq_element *elem, unsigned int len,
+ unsigned int idx);
+void vu_queue_flush(struct vu_virtq *vq, unsigned int count);
+#endif /* VIRTIO_H */
--
2.47.0
next prev parent reply other threads:[~2024-11-13 8:04 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-13 8:03 [PATCH v11 00/10] Add vhost-user support to passt. (part 3) Laurent Vivier
2024-11-13 8:04 ` [PATCH v11 01/10] packet: replace struct desc by struct iovec Laurent Vivier
2024-11-13 8:04 ` Laurent Vivier [this message]
2024-11-13 8:04 ` [PATCH v11 03/10] vhost-user: introduce vhost-user API Laurent Vivier
2024-11-13 8:04 ` [PATCH v11 04/10] udp: Prepare udp.c to be shared with vhost-user Laurent Vivier
2024-11-13 8:04 ` [PATCH v11 05/10] tcp: Export headers functions Laurent Vivier
2024-11-13 8:04 ` [PATCH v11 06/10] passt: rename tap_sock_init() to tap_backend_init() Laurent Vivier
2024-11-13 8:04 ` [PATCH v11 07/10] vhost-user: add vhost-user Laurent Vivier
2024-11-13 8:04 ` [PATCH v11 08/10] test: Add tests for passt in vhost-user mode Laurent Vivier
2024-11-13 8:04 ` [PATCH v11 09/10] tcp_vu: Share more header construction between IPv4 and IPv6 paths Laurent Vivier
2024-11-13 8:04 ` [PATCH v11 10/10] tcp: Move tcp_l2_buf_fill_headers() to tcp_buf.c Laurent Vivier
2024-11-13 9:22 ` [PATCH v11 00/10] Add vhost-user support to passt. (part 3) Stefano Brivio
2024-11-13 9:48 ` Laurent Vivier
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241113080411.476646-3-lvivier@redhat.com \
--to=lvivier@redhat.com \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).