From: David Gibson <david@gibson.dropbear.id.au>
To: Laurent Vivier <lvivier@redhat.com>
Cc: passt-dev@passt.top
Subject: Re: [PATCH v2 2/4] vhost-user: introduce virtio API
Date: Wed, 17 Jul 2024 15:21:32 +1000 [thread overview]
Message-ID: <ZpdU3HWHOe3YPWkZ@zatzit> (raw)
In-Reply-To: <20240712153244.831436-3-lvivier@redhat.com>
[-- Attachment #1: Type: text/plain, Size: 29066 bytes --]
On Fri, Jul 12, 2024 at 05:32:42PM +0200, Laurent Vivier wrote:
> Add virtio.c and virtio.h that define the functions needed
> to manage virtqueues.
>
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> ---
> Makefile | 4 +-
> util.h | 11 +
> virtio.c | 611 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
> virtio.h | 190 +++++++++++++++++
> 4 files changed, 814 insertions(+), 2 deletions(-)
> create mode 100644 virtio.c
> create mode 100644 virtio.h
>
> diff --git a/Makefile b/Makefile
> index 09fc461d087e..39613a7cf1f2 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -47,7 +47,7 @@ FLAGS += -DDUAL_STACK_SOCKETS=$(DUAL_STACK_SOCKETS)
> PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c flow.c fwd.c \
> icmp.c igmp.c inany.c iov.c ip.c isolation.c lineread.c log.c mld.c \
> ndp.c netlink.c packet.c passt.c pasta.c pcap.c pif.c tap.c tcp.c \
> - tcp_buf.c tcp_splice.c udp.c util.c
> + tcp_buf.c tcp_splice.c udp.c util.c virtio.c
> QRAP_SRCS = qrap.c
> SRCS = $(PASST_SRCS) $(QRAP_SRCS)
>
> @@ -57,7 +57,7 @@ PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h fwd.h \
> flow_table.h icmp.h icmp_flow.h inany.h iov.h ip.h isolation.h \
> lineread.h log.h ndp.h netlink.h packet.h passt.h pasta.h pcap.h pif.h \
> siphash.h tap.h tcp.h tcp_buf.h tcp_conn.h tcp_internal.h tcp_splice.h \
> - udp.h util.h
> + udp.h util.h virtio.h
> HEADERS = $(PASST_HEADERS) seccomp.h
>
> C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_snd_wnd = 0 };
> diff --git a/util.h b/util.h
> index eebb027be487..56c4e2e7b4fe 100644
> --- a/util.h
> +++ b/util.h
> @@ -48,6 +48,9 @@
> #define ROUND_DOWN(x, y) ((x) & ~((y) - 1))
> #define ROUND_UP(x, y) (((x) + (y) - 1) & ~((y) - 1))
>
> +#define ALIGN_DOWN(n, m) ((n) / (m) * (m))
> +#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m))
Hrm. Aren't these equivalent to the ROUND_{UP,DOWN}() macros above.
Or rather, I think the ALIGN versions are more general, since they'll
work with y/m values that aren't powers of 2. I don't see any reason
to have two versions, though, since I'm fairly confident the compiler
will be able to convert the more general version to the more specific
one as necessary.
> #define MAX_FROM_BITS(n) (((1U << (n)) - 1))
>
> #define BIT(n) (1UL << (n))
> @@ -116,6 +119,14 @@
> #define htonl_constant(x) (__bswap_constant_32(x))
> #endif
>
> +static inline void barrier(void) { __asm__ __volatile__("" ::: "memory"); }
> +#define smp_mb() do { barrier(); __atomic_thread_fence(__ATOMIC_SEQ_CST); } while (0)
> +#define smp_mb_release() do { barrier(); __atomic_thread_fence(__ATOMIC_RELEASE); } while (0)
> +#define smp_mb_acquire() do { barrier(); __atomic_thread_fence(__ATOMIC_ACQUIRE); } while (0)
> +
> +#define smp_wmb() smp_mb_release()
> +#define smp_rmb() smp_mb_acquire()
> +
> #define NS_FN_STACK_SIZE (RLIMIT_STACK_VAL * 1024 / 8)
> int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
> void *arg);
> diff --git a/virtio.c b/virtio.c
> new file mode 100644
> index 000000000000..5f984f92cae0
> --- /dev/null
> +++ b/virtio.c
> @@ -0,0 +1,611 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later
> + * Copyright Red Hat
> + * Author: Laurent Vivier <lvivier@redhat.com>
> + *
> + * virtio API, vring and virtqueue functions definition
> + */
Nit: the convention in post passt source files is, SPDX, then
description of this file, then copyright and authorship.
> +
> +/* some parts copied from QEMU subprojects/libvhost-user/libvhost-user.c */
> +
> +#include <stddef.h>
> +#include <endian.h>
> +#include <string.h>
> +#include <errno.h>
> +#include <sys/eventfd.h>
> +#include <sys/socket.h>
> +
> +#include "util.h"
> +#include "virtio.h"
> +
> +#define VIRTQUEUE_MAX_SIZE 1024
> +
> +/**
> + * vu_gpa_to_va() - Translate guest physical address to our virtual address.
> + * @dev: Vhost-user device
> + * @plen: Physical length to map (input), virtual address mapped (output)
> + * @guest_addr: Guest physical address
> + *
> + * Return: virtual address in our address space of the guest physical address
> + */
> +static void *vu_gpa_to_va(struct vu_dev *dev, uint64_t *plen, uint64_t guest_addr)
> +{
> + unsigned int i;
> +
> + if (*plen == 0)
> + return NULL;
> +
> + /* Find matching memory region. */
> + for (i = 0; i < dev->nregions; i++) {
> + const struct vu_dev_region *r = &dev->regions[i];
> +
> + if ((guest_addr >= r->gpa) &&
> + (guest_addr < (r->gpa + r->size))) {
> + if ((guest_addr + *plen) > (r->gpa + r->size))
> + *plen = r->gpa + r->size - guest_addr;
> + /* NOLINTNEXTLINE(performance-no-int-to-ptr) */
> + return (void *)(guest_addr - r->gpa + r->mmap_addr +
> + r->mmap_offset);
> + }
> + }
> +
> + return NULL;
> +}
> +
> +/**
> + * vring_avail_flags() - Read the available ring flags
> + * @vq: Virtqueue
> + *
> + * Return: the available ring descriptor flags of the given virtqueue
> + */
> +static inline uint16_t vring_avail_flags(const struct vu_virtq *vq)
> +{
> + return le16toh(vq->vring.avail->flags);
> +}
> +
> +/**
> + * vring_avail_idx() - Read the available ring index
> + * @vq: Virtqueue
> + *
> + * Return: the available ring index of the given virtqueue
> + */
> +static inline uint16_t vring_avail_idx(struct vu_virtq *vq)
> +{
> + vq->shadow_avail_idx = le16toh(vq->vring.avail->idx);
> +
> + return vq->shadow_avail_idx;
> +}
> +
> +/**
> + * vring_avail_ring() - Read an available ring entry
> + * @vq: Virtqueue
> + * @i Index of the entry to read
> + *
> + * Return: the ring entry content (head of the descriptor chain)
> + */
> +static inline uint16_t vring_avail_ring(const struct vu_virtq *vq, int i)
> +{
> + return le16toh(vq->vring.avail->ring[i]);
> +}
> +
> +/**
> + * vring_get_used_event() - Get the used event from the available ring
> + * @vq Virtqueue
> + *
> + * Return: the used event (available only if VIRTIO_RING_F_EVENT_IDX is set)
> + * used_event is a performant alternative where the driver
> + * specifies how far the device can progress before a notification
> + * is required. In this case, virq_avail is defined as:
> + * struct virtq_avail {
> + * le16 flags;
> + * le16 idx;
> + * le16 ring[num];
> + * le16 used_event; // Only if VIRTIO_F_EVENT_IDX
> + * };
> + * If the idx field in the used ring (which determined where that
> + * descriptor index was placed) was equal to used_event, the device
> + * must send a notification.
> + * Otherwise the device should not send a notification.
> + */
> +static inline uint16_t vring_get_used_event(const struct vu_virtq *vq)
> +{
> + return vring_avail_ring(vq, vq->vring.num);
> +}
> +
> +/**
> + * virtqueue_get_head() - Get the head of the descriptor chain for a given
> + * index
> + * @vq: Virtqueue
> + * @idx: Available ring entry index
> + * @head: Head of the descriptor chain
> + */
> +static void virtqueue_get_head(const struct vu_virtq *vq,
> + unsigned int idx, unsigned int *head)
> +{
> + /* Grab the next descriptor number they're advertising, and increment
> + * the index we've seen.
> + */
> + *head = vring_avail_ring(vq, idx % vq->vring.num);
> +
> + /* If their number is silly, that's a fatal mistake. */
> + if (*head >= vq->vring.num)
> + vu_panic("Guest says index %u is available", *head);
> +}
> +
> +/**
> + * virtqueue_read_indirect_desc() - Copy virtio ring descriptors from guest
> + * memory
> + * @dev: Vhost-user device
> + * @desc: Destination address to copy the descriptors
> + * @addr: Guest memory address to copy from
> + * @len: Length of memory to copy
> + *
> + * Return: -1 if there is an error, 0 otherwise
> + */
> +static int virtqueue_read_indirect_desc(struct vu_dev *dev, struct vring_desc *desc,
> + uint64_t addr, size_t len)
> +{
> + uint64_t read_len;
> +
> + if (len > (VIRTQUEUE_MAX_SIZE * sizeof(struct vring_desc)))
> + return -1;
> +
> + if (len == 0)
> + return -1;
> +
> + while (len) {
> + const struct vring_desc *ori_desc;
> +
> + read_len = len;
> + ori_desc = vu_gpa_to_va(dev, &read_len, addr);
> + if (!ori_desc)
> + return -1;
> +
> + memcpy(desc, ori_desc, read_len);
> + len -= read_len;
> + addr += read_len;
> + desc += read_len / sizeof(struct vring_desc);
> + }
> +
> + return 0;
> +}
> +
> +/**
> + * enum virtqueue_read_desc_state - State in the descriptor chain
> + * @VIRTQUEUE_READ_DESC_ERROR Found an invalid descriptor
> + * @VIRTQUEUE_READ_DESC_DONE No more descriptor in the chain
Nit: grammar, "No more descriptors in the chain"
> + * @VIRTQUEUE_READ_DESC_MORE there is more descriptors in the chain
Nit: grammar, "there are" rather than "there is"
> + */
> +enum virtqueue_read_desc_state {
> + VIRTQUEUE_READ_DESC_ERROR = -1,
> + VIRTQUEUE_READ_DESC_DONE = 0, /* end of chain */
> + VIRTQUEUE_READ_DESC_MORE = 1, /* more buffers in chain */
> +};
> +
> +/**
> + * virtqueue_read_next_desc() - Read the the next descriptor in the chain
> + * @desc: Virtio ring descriptors
> + * @i: Index of the current descriptor
> + * @max: Maximum value of the descriptor index
> + * @next: Index of the next descriptor in the chain (output value)
> + *
> + * Return: current chain descriptor state (error, next, done)
> + */
> +static int virtqueue_read_next_desc(const struct vring_desc *desc,
> + int i, unsigned int max, unsigned int *next)
> +{
> + /* If this descriptor says it doesn't chain, we're done. */
> + if (!(le16toh(desc[i].flags) & VRING_DESC_F_NEXT))
> + return VIRTQUEUE_READ_DESC_DONE;
> +
> + /* Check they're not leading us off end of descriptors. */
> + *next = le16toh(desc[i].next);
> + /* Make sure compiler knows to grab that: we don't want it changing! */
> + smp_wmb();
> +
> + if (*next >= max)
> + return VIRTQUEUE_READ_DESC_ERROR;
> +
> + return VIRTQUEUE_READ_DESC_MORE;
> +}
> +
> +/**
> + * vu_queue_empty() - Check if virtqueue is empty
> + * @vq: Virtqueue
> + *
> + * Return: true if the virtqueue is empty, false otherwise
> + */
> +bool vu_queue_empty(struct vu_virtq *vq)
> +{
> + if (!vq->vring.avail)
> + return true;
> +
> + if (vq->shadow_avail_idx != vq->last_avail_idx)
> + return false;
> +
> + return vring_avail_idx(vq) == vq->last_avail_idx;
> +}
> +
> +/**
> + * vring_notify() - Check if a notification can be sent
> + * @dev: Vhost-user device
> + * @vq: Virtqueue
> + *
> + * Return: true if notification can be sent
> + */
Maybe call this vring_can_notify() or something, since it doesn't
actually do the notification.
> +static bool vring_notify(const struct vu_dev *dev, struct vu_virtq *vq)
> +{
> + uint16_t old, new;
> + bool v;
> +
> + /* We need to expose used array entries before checking used event. */
> + smp_mb();
> +
> + /* Always notify when queue is empty (when feature acknowledge) */
> + if (vu_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
> + !vq->inuse && vu_queue_empty(vq)) {
> + return true;
> + }
> +
> + if (!vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX))
> + return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
> +
> + v = vq->signalled_used_valid;
> + vq->signalled_used_valid = true;
> + old = vq->signalled_used;
> + new = vq->signalled_used = vq->used_idx;
> + return !v || vring_need_event(vring_get_used_event(vq), new, old);
> +}
> +
> +/**
> + * vu_queue_notify() - Send a notification the given virtqueue
> + * @dev: Vhost-user device
> + * @vq: Virtqueue
> + */
> +/* cppcheck-suppress unusedFunction */
> +void vu_queue_notify(const struct vu_dev *dev, struct vu_virtq *vq)
> +{
> + if (!vq->vring.avail)
> + return;
> +
> + if (!vring_notify(dev, vq)) {
> + debug("skipped notify...");
Maybe give a bit more context in this message (like the fact that it's
vhost-user related).
> + return;
> + }
> +
> + if (eventfd_write(vq->call_fd, 1) < 0)
> + vu_panic("Error writing eventfd: %s", strerror(errno));
> +}
> +
> +/**
> + * vring_set_avail_event() - Set avail_event
> + * @vq: Virtqueue
> + * @val: Value to set to avail_event
> + * avail_event is used in the same way the used_event is in the
> + * avail_ring.
> + * struct virtq_used {
> + * le16 flags;
> + * le16 idx;
> + * struct virtq_used_elem ringnum];
> + * le16 avail_event; // Only if VIRTIO_F_EVENT_IDX
> + * };
> + * avail_event is used to advise the driver that notifications
> + * are unnecessary until the driver writes entry with an index
> + * specified by avail_event into the available ring.
> + */
> +static inline void vring_set_avail_event(struct vu_virtq *vq, uint16_t val)
> +{
> + uint16_t val_le = htole16(val);
> +
> + if (!vq->notification)
> + return;
> +
> + memcpy(&vq->vring.used->ring[vq->vring.num], &val_le, sizeof(uint16_t));
sizeof(val_le) would be preferred here.
> +}
> +
> +/**
> + * virtqueue_map_desc() - Translate descriptor ring physical address into our
> + * virtual address space
> + * @dev: Vhost-user device
> + * @p_num_sg: First iov entry to use (input),
> + * first iov entry not sued (output)
s/sued/used/?
> + * @iov: Iov array to use to store buffer virtual addresses
> + * @max_num_sg: Maximum number of iov entries
> + * @pa: Guest physical address of the buffer to map into our virtual
> + * address
> + * @sz: Size of the buffer
> + *
> + * Return: false on error, true otherwise
> + */
> +static bool virtqueue_map_desc(struct vu_dev *dev,
> + unsigned int *p_num_sg, struct iovec *iov,
> + unsigned int max_num_sg,
> + uint64_t pa, size_t sz)
> +{
> + unsigned int num_sg = *p_num_sg;
> +
> + ASSERT(num_sg <= max_num_sg);
Shouldn't this be strictly <? Otherwise we'll panic on the first
iteration, won't we?
> + if (!sz)
> + vu_panic("virtio: zero sized buffers are not allowed");
IIUC this indicates a bug in the caller, so just ASSERT(sz) would be
appropriate.
> +
> + while (sz) {
> + uint64_t len = sz;
> +
> + if (num_sg == max_num_sg)
> + vu_panic("virtio: too many descriptors in indirect table");
> +
> + iov[num_sg].iov_base = vu_gpa_to_va(dev, &len, pa);
> + if (iov[num_sg].iov_base == NULL)
> + vu_panic("virtio: invalid address for buffers");
This could also be an ASSERT(), I think.
> + iov[num_sg].iov_len = len;
> + num_sg++;
> + sz -= len;
> + pa += len;
> + }
> +
> + *p_num_sg = num_sg;
> + return true;
> +}
> +
> +/**
> + * vu_queue_map_desc - Map the virqueue descriptor ring into our virtual
> + * address space
> + * @dev: Vhost-user device
> + * @vq: Virtqueue
> + * @idx: First descriptor ring entry to map
> + * @elem: Virtqueue element to store descriptor ring iov
> + *
> + * Return: -1 if there is an error, 0 otherwise
> + */
> +static int vu_queue_map_desc(struct vu_dev *dev, struct vu_virtq *vq, unsigned int idx,
> + struct vu_virtq_element *elem)
> +{
> + const struct vring_desc *desc = vq->vring.desc;
> + struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE];
> + unsigned int out_num = 0, in_num = 0;
> + unsigned int max = vq->vring.num;
> + unsigned int i = idx;
> + uint64_t read_len;
> + int rc;
> +
> + if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
> + unsigned int desc_len;
> + uint64_t desc_addr;
> +
> + if (le32toh(desc[i].len) % sizeof(struct vring_desc))
> + vu_panic("Invalid size for indirect buffer table");
> +
> + /* loop over the indirect descriptor table */
> + desc_addr = le64toh(desc[i].addr);
> + desc_len = le32toh(desc[i].len);
> + max = desc_len / sizeof(struct vring_desc);
> + read_len = desc_len;
> + desc = vu_gpa_to_va(dev, &read_len, desc_addr);
> + if (desc && read_len != desc_len) {
> + /* Failed to use zero copy */
> + desc = NULL;
> + if (!virtqueue_read_indirect_desc(dev, desc_buf, desc_addr, desc_len))
> + desc = desc_buf;
> + }
> + if (!desc)
> + vu_panic("Invalid indirect buffer table");
> + i = 0;
> + }
> +
> + /* Collect all the descriptors */
> + do {
> + if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
> + if (!virtqueue_map_desc(dev, &in_num, elem->in_sg,
> + elem->in_num,
> + le64toh(desc[i].addr),
> + le32toh(desc[i].len))) {
> + return -1;
> + }
> + } else {
> + if (in_num)
> + vu_panic("Incorrect order for descriptors");
> + if (!virtqueue_map_desc(dev, &out_num, elem->out_sg,
> + elem->out_num,
> + le64toh(desc[i].addr),
> + le32toh(desc[i].len))) {
> + return -1;
> + }
> + }
> +
> + /* If we've got too many, that implies a descriptor loop. */
> + if ((in_num + out_num) > max)
> + vu_panic("Looped descriptor");
> + rc = virtqueue_read_next_desc(desc, i, max, &i);
> + } while (rc == VIRTQUEUE_READ_DESC_MORE);
> +
> + if (rc == VIRTQUEUE_READ_DESC_ERROR)
> + vu_panic("read descriptor error");
> +
> + elem->index = idx;
> + elem->in_num = in_num;
> + elem->out_num = out_num;
> +
> + return 0;
> +}
> +
> +/**
> + * vu_queue_pop() - Pop an entry from the virtqueue
> + * @dev: Vhost-user device
> + * @vq: Virtqueue
> + * @elem: Virtqueue element to file with the entry information
> + *
> + * Return: -1 if there is an error, 0 otherwise
> + */
> +/* cppcheck-suppress unusedFunction */
> +int vu_queue_pop(struct vu_dev *dev, struct vu_virtq *vq, struct vu_virtq_element *elem)
> +{
> + unsigned int head;
> + int ret;
> +
> + if (!vq->vring.avail)
> + return -1;
> +
> + if (vu_queue_empty(vq))
> + return -1;
> +
> + /*
> + * Needed after vu_queue_empty(), see comment in
> + * virtqueue_num_heads().
> + */
> + smp_rmb();
> +
> + if (vq->inuse >= vq->vring.num)
> + vu_panic("Virtqueue size exceeded");
> +
> + virtqueue_get_head(vq, vq->last_avail_idx++, &head);
> +
> + if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX))
> + vring_set_avail_event(vq, vq->last_avail_idx);
> +
> + ret = vu_queue_map_desc(dev, vq, head, elem);
> +
> + if (ret < 0)
> + return ret;
> +
> + vq->inuse++;
> +
> + return 0;
> +}
> +
> +/**
> + * vu_queue_detach_element() - Detach an element from the virqueue
> + * @dev: Vhost-user device
> + * @vq: Virtqueue
> + * @index: Index of the element to detach
> + * @len: Size of the element to detach
> + */
> +void vu_queue_detach_element(struct vu_dev *dev, struct vu_virtq *vq,
> + unsigned int index, size_t len)
> +{
> + (void)dev;
> + (void)index;
> + (void)len;
AFAICT this isn't used as a function pointer, so why include the
unused parameter?
> +
> + vq->inuse--;
> + /* unmap, when DMA support is added */
> +}
> +
> +/**
> + * vu_queue_unpop() - Push back a previously popped element from the virqueue
> + * @dev: Vhost-user device
> + * @vq: Virtqueue
> + * @index: Index of the element to unpop
> + * @len: Size of the element to unpop
> + */
> +/* cppcheck-suppress unusedFunction */
> +void vu_queue_unpop(struct vu_dev *dev, struct vu_virtq *vq, unsigned int index, size_t len)
> +{
> + vq->last_avail_idx--;
> + vu_queue_detach_element(dev, vq, index, len);
> +}
> +
> +/**
> + * vu_queue_rewind() - Push back a given number of popped elements
> + * @dev: Vhost-user device
> + * @vq: Virtqueue
> + * @num: Number of element to unpop
> + */
> +/* cppcheck-suppress unusedFunction */
> +bool vu_queue_rewind(struct vu_dev *dev, struct vu_virtq *vq, unsigned int num)
> +{
> + (void)dev;
Unused parameter again.
> + if (num > vq->inuse)
> + return false;
> +
> + vq->last_avail_idx -= num;
> + vq->inuse -= num;
> + return true;
> +}
> +
> +/**
> + * vring_used_write() - Write an entry in the used ring
> + * @vq: Virtqueue
> + * @uelem: Entry to write
> + * @i: Index of the entry in the used ring
> + */
> +static inline void vring_used_write(struct vu_virtq *vq,
> + const struct vring_used_elem *uelem, int i)
> +{
> + struct vring_used *used = vq->vring.used;
> +
> + used->ring[i] = *uelem;
> +}
> +
> +/**
> + * vu_queue_fill_by_index() - Update information of a descriptor ring entry
> + * in the used ring
> + * @vq: Virtqueue
> + * @index: Descriptor ring index
> + * @len: Size of the element
> + * @idx: Used ring entry index
> + */
> +void vu_queue_fill_by_index(struct vu_virtq *vq, unsigned int index,
> + unsigned int len, unsigned int idx)
> +{
> + struct vring_used_elem uelem;
> +
> + if (!vq->vring.avail)
> + return;
> +
> + idx = (idx + vq->used_idx) % vq->vring.num;
> +
> + uelem.id = htole32(index);
> + uelem.len = htole32(len);
> + vring_used_write(vq, &uelem, idx);
> +}
> +
> +/**
> + * vu_queue_fill() - Update information of a given element in the used ring
> + * @dev: Vhost-user device
> + * @vq: Virtqueue
> + * @elem: Element information to fill
> + * @len: Size of the element
> + * @idx: Used ring entry index
> + */
> +/* cppcheck-suppress unusedFunction */
> +void vu_queue_fill(struct vu_virtq *vq, const struct vu_virtq_element *elem,
> + unsigned int len, unsigned int idx)
> +{
> + vu_queue_fill_by_index(vq, elem->index, len, idx);
> +}
> +
> +/**
> + * vring_used_idx_set() - Set the descriptor ring current index
> + * @vq: Virtqueue
> + * @val: Value to set in the index
> + */
> +static inline void vring_used_idx_set(struct vu_virtq *vq, uint16_t val)
> +{
> + vq->vring.used->idx = htole16(val);
> +
> + vq->used_idx = val;
> +}
> +
> +/**
> + * vu_queue_flush() - Flush the virtqueue
> + * @vq: Virtqueue
> + * @count: Number of entry to flush
> + */
> +/* cppcheck-suppress unusedFunction */
> +void vu_queue_flush(struct vu_virtq *vq, unsigned int count)
> +{
> + uint16_t old, new;
> +
> + if (!vq->vring.avail)
> + return;
> +
> + /* Make sure buffer is written before we update index. */
> + smp_wmb();
> +
> + old = vq->used_idx;
> + new = old + count;
> + vring_used_idx_set(vq, new);
> + vq->inuse -= count;
> + if ((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))
This seems really weird: explicitly casting two sides of a comparison
to different signedness. Is that an error or is there some subtle
logic to it?
> + vq->signalled_used_valid = false;
> +}
> diff --git a/virtio.h b/virtio.h
> new file mode 100644
> index 000000000000..0a2cf6230139
> --- /dev/null
> +++ b/virtio.h
> @@ -0,0 +1,190 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later
> + * Copyright Red Hat
> + * Author: Laurent Vivier <lvivier@redhat.com>
> + *
> + * virtio API, vring and virtqueue functions definition
> + */
> +
> +#ifndef VIRTIO_H
> +#define VIRTIO_H
> +
> +#include <stdbool.h>
> +#include <linux/vhost_types.h>
> +
> +#define vu_panic(...) die( __VA_ARGS__ )
> +
> +/* Maximum size of a virtqueue */
> +#define VIRTQUEUE_MAX_SIZE 1024
> +
> +/**
> + * struct vu_ring - Virtqueue rings
> + * @num: Size of the queue
> + * @desc: Descriptor ring
> + * @avail: Available ring
> + * @used: Used ring
> + * @log_guest_addr: Guest address for logging
> + * @flags: Vring flags
> + * VHOST_VRING_F_LOG is set if log address is valid
> + */
> +struct vu_ring {
> + unsigned int num;
> + struct vring_desc *desc;
> + struct vring_avail *avail;
> + struct vring_used *used;
> + uint64_t log_guest_addr;
> + uint32_t flags;
> +};
> +
> +/**
> + * struct vu_virtq - Virtqueue definition
> + * @vring: Virtqueue rings
> + * @last_avail_idx: Next head to pop
> + * @shadow_avail_idx: Last avail_idx read from VQ.
> + * @used_idx: Descriptor ring current index
> + * @signalled_used: Last used index value we have signalled on
> + * @signalled_used_valid: True if signalled_used if valid
> + * @notification: True if the queues notify (via event
> + * index or interrupt)
> + * @inuse: Number of entries in use
> + * @call_fd: The event file descriptor to signal when
> + * buffers are used.
> + * @kick_fd: The event file descriptor for adding
> + * buffers to the vring
> + * @err_fd: The event file descriptor to signal when
> + * error occurs
> + * @enable: True if the virtqueue is enabled
> + * @started: True if the virtqueue is started
> + * @vra: QEMU address of our rings
> + */
> +struct vu_virtq {
> + struct vu_ring vring;
> + uint16_t last_avail_idx;
> + uint16_t shadow_avail_idx;
> + uint16_t used_idx;
> + uint16_t signalled_used;
> + bool signalled_used_valid;
> + bool notification;
> + unsigned int inuse;
> + int call_fd;
> + int kick_fd;
> + int err_fd;
> + unsigned int enable;
> + bool started;
> + struct vhost_vring_addr vra;
> +};
> +
> +/**
> + * struct vu_dev_region - guest shared memory region
> + * @gpa: Guest physical address of the region
> + * @size: Memory size in bytes
> + * @qva: QEMU virtual address
Is this actually the qemu virtual address? Or is it our virtual
address?
> + * @mmap_offset: Offset where the region starts in the mapped memory
> + * @mmap_addr: Address of the mapped memory
> + */
> +struct vu_dev_region {
> + uint64_t gpa;
> + uint64_t size;
> + uint64_t qva;
> + uint64_t mmap_offset;
> + uint64_t mmap_addr;
> +};
> +
> +#define VHOST_USER_MAX_QUEUES 2
> +
> +/*
> + * Set a reasonable maximum number of ram slots, which will be supported by
> + * any architecture.
> + */
> +#define VHOST_USER_MAX_RAM_SLOTS 32
> +
> +/**
> + * struct vu_dev
> + * @context: Execution context
This looks like a copypasta error.
> + * nregions: Number of shared memory regions
Missing '@'
> + * @regions: Guest shared memory regions
> + * @features: Vhost-user features
> + * @protocol_features: Vhost-user protocol features
> + * @hdrlen: Virtio -net header length
> + */
> +struct vu_dev {
> + uint32_t nregions;
> + struct vu_dev_region regions[VHOST_USER_MAX_RAM_SLOTS];
> + struct vu_virtq vq[VHOST_USER_MAX_QUEUES];
> + uint64_t features;
> + uint64_t protocol_features;
> + int hdrlen;
> +};
> +
> +/**
> + * struct vu_virtq_element
> + * @index: Descriptor ring index
> + * @out_num: Number of outgoing iovec buffers
> + * @in_num: Number of incoming iovec buffers
> + * @in_sg: Incoming iovec buffers
> + * @out_sg: Outgoing iovec buffers
> + */
> +struct vu_virtq_element {
> + unsigned int index;
> + unsigned int out_num;
> + unsigned int in_num;
> + struct iovec *in_sg;
> + struct iovec *out_sg;
> +};
> +
> +/**
> + * has_feature() - Check a feature bit in a features set
> + * @features: Features set
> + * @fb: Feature bit to check
> + *
> + * Return: True if the feature bit is set
> + */
> +static inline bool has_feature(uint64_t features, unsigned int fbit)
> +{
> + return !!(features & (1ULL << fbit));
> +}
> +
> +/**
> + * vu_has_feature() - Check if a virtio-net feature is available
> + * @vdev: Vhost-user device
> + * @bit: Feature to check
> + *
> + * Return: True if the feature is available
> + */
> +static inline bool vu_has_feature(const struct vu_dev *vdev,
> + unsigned int fbit)
> +{
> + return has_feature(vdev->features, fbit);
> +}
> +
> +/**
> + * vu_has_protocol_feature() - Check if a vhost-user feature is available
> + * @vdev: Vhost-user device
> + * @bit: Feature to check
> + *
> + * Return: True if the feature is available
> + */
> +/* cppcheck-suppress unusedFunction */
> +static inline bool vu_has_protocol_feature(const struct vu_dev *vdev,
> + unsigned int fbit)
> +{
> + return has_feature(vdev->protocol_features, fbit);
> +}
> +
> +bool vu_queue_empty(struct vu_virtq *vq);
> +void vu_queue_notify(const struct vu_dev *dev, struct vu_virtq *vq);
> +int vu_queue_pop(struct vu_dev *dev, struct vu_virtq *vq,
> + struct vu_virtq_element *elem);
> +void vu_queue_detach_element(struct vu_dev *dev, struct vu_virtq *vq,
> + unsigned int index, size_t len);
> +void vu_queue_unpop(struct vu_dev *dev, struct vu_virtq *vq,
> + unsigned int index, size_t len);
> +bool vu_queue_rewind(struct vu_dev *dev, struct vu_virtq *vq,
> + unsigned int num);
> +
> +void vu_queue_fill_by_index(struct vu_virtq *vq, unsigned int index,
> + unsigned int len, unsigned int idx);
> +void vu_queue_fill(struct vu_virtq *vq,
> + const struct vu_virtq_element *elem, unsigned int len,
> + unsigned int idx);
> +void vu_queue_flush(struct vu_virtq *vq, unsigned int count);
> +#endif /* VIRTIO_H */
--
David Gibson (he or they) | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you, not the other way
| around.
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
next prev parent reply other threads:[~2024-07-17 5:21 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-07-12 15:32 [PATCH v2 0/4] Add vhost-user support to passt. (part 3) Laurent Vivier
2024-07-12 15:32 ` [PATCH v2 1/4] packet: replace struct desc by struct iovec Laurent Vivier
2024-07-15 4:59 ` David Gibson
2024-07-19 21:28 ` Stefano Brivio
2024-07-12 15:32 ` [PATCH v2 2/4] vhost-user: introduce virtio API Laurent Vivier
2024-07-17 5:21 ` David Gibson [this message]
2024-08-14 12:47 ` Laurent Vivier
2024-08-15 4:52 ` David Gibson
2024-07-19 21:29 ` Stefano Brivio
2024-07-12 15:32 ` [PATCH v2 3/4] vhost-user: introduce vhost-user API Laurent Vivier
2024-07-19 21:29 ` Stefano Brivio
2024-08-14 14:44 ` Laurent Vivier
2024-07-12 15:32 ` [PATCH v2 4/4] vhost-user: add vhost-user Laurent Vivier
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ZpdU3HWHOe3YPWkZ@zatzit \
--to=david@gibson.dropbear.id.au \
--cc=lvivier@redhat.com \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).