/* SPDX-License-Identifier: GPL-2.0-or-later * Copyright Red Hat * Author: Laurent Vivier * * virtio API, vring and virtqueue functions definition */ /* some parts copied from QEMU subprojects/libvhost-user/libvhost-user.c */ #include #include #include #include #include #include #include "util.h" #include "virtio.h" #define VIRTQUEUE_MAX_SIZE 1024 /** * vu_gpa_to_va() - Translate guest physical address to our virtual address. * @dev: Vhost-user device * @plen: Physical length to map (input), virtual address mapped (output) * @guest_addr: Guest physical address * * Return: virtual address in our address space of the guest physical address */ static void *vu_gpa_to_va(struct vu_dev *dev, uint64_t *plen, uint64_t guest_addr) { unsigned int i; if (*plen == 0) return NULL; /* Find matching memory region. */ for (i = 0; i < dev->nregions; i++) { const struct vu_dev_region *r = &dev->regions[i]; if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) { if ((guest_addr + *plen) > (r->gpa + r->size)) *plen = r->gpa + r->size - guest_addr; /* NOLINTNEXTLINE(performance-no-int-to-ptr) */ return (void *)(guest_addr - r->gpa + r->mmap_addr + r->mmap_offset); } } return NULL; } /** * vring_avail_flags() - Read the available ring flags * @vq: Virtqueue * * Return: the available ring descriptor flags of the given virtqueue */ static inline uint16_t vring_avail_flags(const struct vu_virtq *vq) { return le16toh(vq->vring.avail->flags); } /** * vring_avail_idx() - Read the available ring index * @vq: Virtqueue * * Return: the available ring index of the given virtqueue */ static inline uint16_t vring_avail_idx(struct vu_virtq *vq) { vq->shadow_avail_idx = le16toh(vq->vring.avail->idx); return vq->shadow_avail_idx; } /** * vring_avail_ring() - Read an available ring entry * @vq: Virtqueue * @i Index of the entry to read * * Return: the ring entry content (head of the descriptor chain) */ static inline uint16_t vring_avail_ring(const struct vu_virtq *vq, int i) { return le16toh(vq->vring.avail->ring[i]); } /** * vring_get_used_event() - Get the used event from the available ring * @vq Virtqueue * * Return: the used event (available only if VIRTIO_RING_F_EVENT_IDX is set) * used_event is a performant alternative where the driver * specifies how far the device can progress before a notification * is required. In this case, virq_avail is defined as: * struct virtq_avail { * le16 flags; * le16 idx; * le16 ring[num]; * le16 used_event; // Only if VIRTIO_F_EVENT_IDX * }; * If the idx field in the used ring (which determined where that * descriptor index was placed) was equal to used_event, the device * must send a notification. * Otherwise the device should not send a notification. */ static inline uint16_t vring_get_used_event(const struct vu_virtq *vq) { return vring_avail_ring(vq, vq->vring.num); } /** * virtqueue_get_head() - Get the head of the descriptor chain for a given * index * @vq: Virtqueue * @idx: Available ring entry index * @head: Head of the descriptor chain */ static void virtqueue_get_head(const struct vu_virtq *vq, unsigned int idx, unsigned int *head) { /* Grab the next descriptor number they're advertising, and increment * the index we've seen. */ *head = vring_avail_ring(vq, idx % vq->vring.num); /* If their number is silly, that's a fatal mistake. */ if (*head >= vq->vring.num) vu_panic("Guest says index %u is available", *head); } /** * virtqueue_read_indirect_desc() - Copy virtio ring descriptors from guest * memory * @dev: Vhost-user device * @desc: Destination address to copy the descriptors * @addr: Guest memory address to copy from * @len: Length of memory to copy * * Return: -1 if there is an error, 0 otherwise */ static int virtqueue_read_indirect_desc(struct vu_dev *dev, struct vring_desc *desc, uint64_t addr, size_t len) { uint64_t read_len; if (len > (VIRTQUEUE_MAX_SIZE * sizeof(struct vring_desc))) return -1; if (len == 0) return -1; while (len) { const struct vring_desc *ori_desc; read_len = len; ori_desc = vu_gpa_to_va(dev, &read_len, addr); if (!ori_desc) return -1; memcpy(desc, ori_desc, read_len); len -= read_len; addr += read_len; desc += read_len / sizeof(struct vring_desc); } return 0; } /** * enum virtqueue_read_desc_state - State in the descriptor chain * @VIRTQUEUE_READ_DESC_ERROR Found an invalid descriptor * @VIRTQUEUE_READ_DESC_DONE No more descriptor in the chain * @VIRTQUEUE_READ_DESC_MORE there is more descriptors in the chain */ enum virtqueue_read_desc_state { VIRTQUEUE_READ_DESC_ERROR = -1, VIRTQUEUE_READ_DESC_DONE = 0, /* end of chain */ VIRTQUEUE_READ_DESC_MORE = 1, /* more buffers in chain */ }; /** * virtqueue_read_next_desc() - Read the the next descriptor in the chain * @desc: Virtio ring descriptors * @i: Index of the current descriptor * @max: Maximum value of the descriptor index * @next: Index of the next descriptor in the chain (output value) * * Return: current chain descriptor state (error, next, done) */ static int virtqueue_read_next_desc(const struct vring_desc *desc, int i, unsigned int max, unsigned int *next) { /* If this descriptor says it doesn't chain, we're done. */ if (!(le16toh(desc[i].flags) & VRING_DESC_F_NEXT)) return VIRTQUEUE_READ_DESC_DONE; /* Check they're not leading us off end of descriptors. */ *next = le16toh(desc[i].next); /* Make sure compiler knows to grab that: we don't want it changing! */ smp_wmb(); if (*next >= max) return VIRTQUEUE_READ_DESC_ERROR; return VIRTQUEUE_READ_DESC_MORE; } /** * vu_queue_empty() - Check if virtqueue is empty * @vq: Virtqueue * * Return: true if the virtqueue is empty, false otherwise */ bool vu_queue_empty(struct vu_virtq *vq) { if (!vq->vring.avail) return true; if (vq->shadow_avail_idx != vq->last_avail_idx) return false; return vring_avail_idx(vq) == vq->last_avail_idx; } /** * vring_notify() - Check if a notification can be sent * @dev: Vhost-user device * @vq: Virtqueue * * Return: true if notification can be sent */ static bool vring_notify(const struct vu_dev *dev, struct vu_virtq *vq) { uint16_t old, new; bool v; /* We need to expose used array entries before checking used event. */ smp_mb(); /* Always notify when queue is empty (when feature acknowledge) */ if (vu_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) && !vq->inuse && vu_queue_empty(vq)) { return true; } if (!vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT); v = vq->signalled_used_valid; vq->signalled_used_valid = true; old = vq->signalled_used; new = vq->signalled_used = vq->used_idx; return !v || vring_need_event(vring_get_used_event(vq), new, old); } /** * vu_queue_notify() - Send a notification the given virtqueue * @dev: Vhost-user device * @vq: Virtqueue */ /* cppcheck-suppress unusedFunction */ void vu_queue_notify(const struct vu_dev *dev, struct vu_virtq *vq) { if (!vq->vring.avail) return; if (!vring_notify(dev, vq)) { debug("skipped notify..."); return; } if (eventfd_write(vq->call_fd, 1) < 0) vu_panic("Error writing eventfd: %s", strerror(errno)); } /** * vring_set_avail_event() - Set avail_event * @vq: Virtqueue * @val: Value to set to avail_event * avail_event is used in the same way the used_event is in the * avail_ring. * struct virtq_used { * le16 flags; * le16 idx; * struct virtq_used_elem ringnum]; * le16 avail_event; // Only if VIRTIO_F_EVENT_IDX * }; * avail_event is used to advise the driver that notifications * are unnecessary until the driver writes entry with an index * specified by avail_event into the available ring. */ static inline void vring_set_avail_event(struct vu_virtq *vq, uint16_t val) { uint16_t val_le = htole16(val); if (!vq->notification) return; memcpy(&vq->vring.used->ring[vq->vring.num], &val_le, sizeof(uint16_t)); } /** * virtqueue_map_desc() - Translate descriptor ring physical address into our * virtual address space * @dev: Vhost-user device * @p_num_sg: First iov entry to use (input), * first iov entry not sued (output) * @iov: Iov array to use to store buffer virtual addresses * @max_num_sg: Maximum number of iov entries * @pa: Guest physical address of the buffer to map into our virtual * address * @sz: Size of the buffer * * Return: false on error, true otherwise */ static bool virtqueue_map_desc(struct vu_dev *dev, unsigned int *p_num_sg, struct iovec *iov, unsigned int max_num_sg, uint64_t pa, size_t sz) { unsigned int num_sg = *p_num_sg; ASSERT(num_sg <= max_num_sg); if (!sz) vu_panic("virtio: zero sized buffers are not allowed"); while (sz) { uint64_t len = sz; if (num_sg == max_num_sg) vu_panic("virtio: too many descriptors in indirect table"); iov[num_sg].iov_base = vu_gpa_to_va(dev, &len, pa); if (iov[num_sg].iov_base == NULL) vu_panic("virtio: invalid address for buffers"); iov[num_sg].iov_len = len; num_sg++; sz -= len; pa += len; } *p_num_sg = num_sg; return true; } /** * vu_queue_map_desc - Map the virqueue descriptor ring into our virtual * address space * @dev: Vhost-user device * @vq: Virtqueue * @idx: First descriptor ring entry to map * @elem: Virtqueue element to store descriptor ring iov * * Return: -1 if there is an error, 0 otherwise */ static int vu_queue_map_desc(struct vu_dev *dev, struct vu_virtq *vq, unsigned int idx, struct vu_virtq_element *elem) { const struct vring_desc *desc = vq->vring.desc; struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE]; unsigned int out_num = 0, in_num = 0; unsigned int max = vq->vring.num; unsigned int i = idx; uint64_t read_len; int rc; if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) { unsigned int desc_len; uint64_t desc_addr; if (le32toh(desc[i].len) % sizeof(struct vring_desc)) vu_panic("Invalid size for indirect buffer table"); /* loop over the indirect descriptor table */ desc_addr = le64toh(desc[i].addr); desc_len = le32toh(desc[i].len); max = desc_len / sizeof(struct vring_desc); read_len = desc_len; desc = vu_gpa_to_va(dev, &read_len, desc_addr); if (desc && read_len != desc_len) { /* Failed to use zero copy */ desc = NULL; if (!virtqueue_read_indirect_desc(dev, desc_buf, desc_addr, desc_len)) desc = desc_buf; } if (!desc) vu_panic("Invalid indirect buffer table"); i = 0; } /* Collect all the descriptors */ do { if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) { if (!virtqueue_map_desc(dev, &in_num, elem->in_sg, elem->in_num, le64toh(desc[i].addr), le32toh(desc[i].len))) { return -1; } } else { if (in_num) vu_panic("Incorrect order for descriptors"); if (!virtqueue_map_desc(dev, &out_num, elem->out_sg, elem->out_num, le64toh(desc[i].addr), le32toh(desc[i].len))) { return -1; } } /* If we've got too many, that implies a descriptor loop. */ if ((in_num + out_num) > max) vu_panic("Looped descriptor"); rc = virtqueue_read_next_desc(desc, i, max, &i); } while (rc == VIRTQUEUE_READ_DESC_MORE); if (rc == VIRTQUEUE_READ_DESC_ERROR) vu_panic("read descriptor error"); elem->index = idx; elem->in_num = in_num; elem->out_num = out_num; return 0; } /** * vu_queue_pop() - Pop an entry from the virtqueue * @dev: Vhost-user device * @vq: Virtqueue * @elem: Virtqueue element to file with the entry information * * Return: -1 if there is an error, 0 otherwise */ /* cppcheck-suppress unusedFunction */ int vu_queue_pop(struct vu_dev *dev, struct vu_virtq *vq, struct vu_virtq_element *elem) { unsigned int head; int ret; if (!vq->vring.avail) return -1; if (vu_queue_empty(vq)) return -1; /* * Needed after vu_queue_empty(), see comment in * virtqueue_num_heads(). */ smp_rmb(); if (vq->inuse >= vq->vring.num) vu_panic("Virtqueue size exceeded"); virtqueue_get_head(vq, vq->last_avail_idx++, &head); if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) vring_set_avail_event(vq, vq->last_avail_idx); ret = vu_queue_map_desc(dev, vq, head, elem); if (ret < 0) return ret; vq->inuse++; return 0; } /** * vu_queue_detach_element() - Detach an element from the virqueue * @dev: Vhost-user device * @vq: Virtqueue * @index: Index of the element to detach * @len: Size of the element to detach */ void vu_queue_detach_element(struct vu_dev *dev, struct vu_virtq *vq, unsigned int index, size_t len) { (void)dev; (void)index; (void)len; vq->inuse--; /* unmap, when DMA support is added */ } /** * vu_queue_unpop() - Push back a previously popped element from the virqueue * @dev: Vhost-user device * @vq: Virtqueue * @index: Index of the element to unpop * @len: Size of the element to unpop */ /* cppcheck-suppress unusedFunction */ void vu_queue_unpop(struct vu_dev *dev, struct vu_virtq *vq, unsigned int index, size_t len) { vq->last_avail_idx--; vu_queue_detach_element(dev, vq, index, len); } /** * vu_queue_rewind() - Push back a given number of popped elements * @dev: Vhost-user device * @vq: Virtqueue * @num: Number of element to unpop */ /* cppcheck-suppress unusedFunction */ bool vu_queue_rewind(struct vu_dev *dev, struct vu_virtq *vq, unsigned int num) { (void)dev; if (num > vq->inuse) return false; vq->last_avail_idx -= num; vq->inuse -= num; return true; } /** * vring_used_write() - Write an entry in the used ring * @vq: Virtqueue * @uelem: Entry to write * @i: Index of the entry in the used ring */ static inline void vring_used_write(struct vu_virtq *vq, const struct vring_used_elem *uelem, int i) { struct vring_used *used = vq->vring.used; used->ring[i] = *uelem; } /** * vu_queue_fill_by_index() - Update information of a descriptor ring entry * in the used ring * @vq: Virtqueue * @index: Descriptor ring index * @len: Size of the element * @idx: Used ring entry index */ void vu_queue_fill_by_index(struct vu_virtq *vq, unsigned int index, unsigned int len, unsigned int idx) { struct vring_used_elem uelem; if (!vq->vring.avail) return; idx = (idx + vq->used_idx) % vq->vring.num; uelem.id = htole32(index); uelem.len = htole32(len); vring_used_write(vq, &uelem, idx); } /** * vu_queue_fill() - Update information of a given element in the used ring * @dev: Vhost-user device * @vq: Virtqueue * @elem: Element information to fill * @len: Size of the element * @idx: Used ring entry index */ /* cppcheck-suppress unusedFunction */ void vu_queue_fill(struct vu_virtq *vq, const struct vu_virtq_element *elem, unsigned int len, unsigned int idx) { vu_queue_fill_by_index(vq, elem->index, len, idx); } /** * vring_used_idx_set() - Set the descriptor ring current index * @vq: Virtqueue * @val: Value to set in the index */ static inline void vring_used_idx_set(struct vu_virtq *vq, uint16_t val) { vq->vring.used->idx = htole16(val); vq->used_idx = val; } /** * vu_queue_flush() - Flush the virtqueue * @vq: Virtqueue * @count: Number of entry to flush */ /* cppcheck-suppress unusedFunction */ void vu_queue_flush(struct vu_virtq *vq, unsigned int count) { uint16_t old, new; if (!vq->vring.avail) return; /* Make sure buffer is written before we update index. */ smp_wmb(); old = vq->used_idx; new = old + count; vring_used_idx_set(vq, new); vq->inuse -= count; if ((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)) vq->signalled_used_valid = false; }