From: Stefano Brivio <sbrivio@redhat.com>
To: Laurent Vivier <lvivier@redhat.com>
Cc: passt-dev@passt.top
Subject: Re: [PATCH 5/9] vhost-user: add VHOST_USER_SET_LOG_BASE command
Date: Fri, 17 Jan 2025 19:05:02 +0100 [thread overview]
Message-ID: <20250117190502.6590b489@elisabeth> (raw)
In-Reply-To: <20241219111400.2352110-6-lvivier@redhat.com>
On Thu, 19 Dec 2024 12:13:56 +0100
Laurent Vivier <lvivier@redhat.com> wrote:
> Sets logging shared memory space.
>
> When the back-end has VHOST_USER_PROTOCOL_F_LOG_SHMFD protocol feature,
> the log memory fd is provided in the ancillary data of
> VHOST_USER_SET_LOG_BASE message, the size and offset of shared memory
> area provided in the message.
>
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> ---
> util.h | 3 ++
> vhost_user.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++-
> vhost_user.h | 3 ++
> virtio.c | 74 ++++++++++++++++++++++++++++++++++++++++++--
> virtio.h | 4 +++
> 5 files changed, 168 insertions(+), 3 deletions(-)
>
> diff --git a/util.h b/util.h
> index 3fa1d12544a0..d02333d5a88d 100644
> --- a/util.h
> +++ b/util.h
> @@ -152,6 +152,9 @@ static inline void barrier(void) { __asm__ __volatile__("" ::: "memory"); }
> #define smp_wmb() smp_mb_release()
> #define smp_rmb() smp_mb_acquire()
>
> +#define qatomic_or(ptr, n) \
> + ((void) __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST))
> +
> #define NS_FN_STACK_SIZE (1024 * 1024) /* 1MiB */
>
> int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
> diff --git a/vhost_user.c b/vhost_user.c
> index ce4373d9eeca..c2fac58badf1 100644
> --- a/vhost_user.c
> +++ b/vhost_user.c
> @@ -510,6 +510,12 @@ static bool vu_set_mem_table_exec(struct vu_dev *vdev,
> */
> static void vu_close_log(struct vu_dev *vdev)
> {
> + if (vdev->log_table) {
> + if (munmap(vdev->log_table, vdev->log_size) != 0)
> + die_perror("close log munmap() error");
> + vdev->log_table = NULL;
> + }
> +
> if (vdev->log_call_fd != -1) {
> close(vdev->log_call_fd);
> vdev->log_call_fd = -1;
> @@ -520,7 +526,6 @@ static void vu_close_log(struct vu_dev *vdev)
> * vu_log_kick() - Inform the front-end that the log has been modified
> * @vdev: vhost-user device
> */
> -/* cppcheck-suppress unusedFunction */
> void vu_log_kick(const struct vu_dev *vdev)
> {
> if (vdev->log_call_fd != -1) {
> @@ -532,6 +537,84 @@ void vu_log_kick(const struct vu_dev *vdev)
> }
> }
>
> +
> +/**
Excess newline.
> + * vu_log_page() -- Update logging table
Single '-' between function name and comment.
> + * @log_table: Base address of the logging table
> + * @page: Page number that has been updated
> + */
> +/* NOLINTNEXTLINE(readability-non-const-parameter) */
> +static void vu_log_page(uint8_t *log_table, uint64_t page)
> +{
> + qatomic_or(&log_table[page / 8], 1 << (page % 8));
> +}
> +
> +/**
> + * vu_log_write() -- Log memory write
Single '-' between function name and comment.
> + * @dev: Vhost-user device
vhost-user
> + * @address: Memory address
> + * @length: Memory size
> + */
> +void vu_log_write(const struct vu_dev *vdev, uint64_t address, uint64_t length)
> +{
> + uint64_t page;
> +
> + if (!vdev->log_table || !length ||
> + !vu_has_feature(vdev, VHOST_F_LOG_ALL))
> + return;
> +
> + page = address / VHOST_LOG_PAGE;
> + while (page * VHOST_LOG_PAGE < address + length) {
> + vu_log_page(vdev->log_table, page);
> + page++;
> + }
> + vu_log_kick(vdev);
> +}
> +
> +/**
> + * vu_set_log_base_exec() - Set the memory log base
> + * @vdev: vhost-user device
> + * @vmsg: vhost-user message
> + *
> + * Return: False as no reply is requested
> + *
> + * #syscalls:vu mmap munmap
I wonder: will there be a way around this the day that we want to
disable mmap() for vhost-user mode too?
> + */
> +static bool vu_set_log_base_exec(struct vu_dev *vdev,
> + struct vhost_user_msg *msg)
> +{
> + uint64_t log_mmap_size, log_mmap_offset;
> + void *base;
> + int fd;
> +
> + if (msg->fd_num != 1 || msg->hdr.size != sizeof(msg->payload.log))
> + die("Invalid log_base message");
Maybe prefix this with "vhost-user:", otherwise it's not really clear
where it's coming from.
> +
> + fd = msg->fds[0];
> + log_mmap_offset = msg->payload.log.mmap_offset;
> + log_mmap_size = msg->payload.log.mmap_size;
> +
> + debug("Log mmap_offset: %"PRId64, log_mmap_offset);
> + debug("Log mmap_size: %"PRId64, log_mmap_size);
> +
> + base = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
> + log_mmap_offset);
> + close(fd);
> + if (base == MAP_FAILED)
> + die("log mmap error");
Same here.
> +
> + if (vdev->log_table)
> + munmap(vdev->log_table, vdev->log_size);
> +
> + vdev->log_table = base;
> + vdev->log_size = log_mmap_size;
> +
> + msg->hdr.size = sizeof(msg->payload.u64);
> + msg->fd_num = 0;
> +
> + return true;
> +}
> +
> /**
> * vu_set_log_fd_exec() -- Set the eventfd used to report logging update
> * @vdev: vhost-user device
> @@ -915,6 +998,7 @@ void vu_init(struct ctx *c)
> .notification = true,
> };
> }
> + c->vdev->log_table = NULL;
> c->vdev->log_call_fd = -1;
> }
>
> @@ -984,6 +1068,7 @@ static bool (*vu_handle[VHOST_USER_MAX])(struct vu_dev *vdev,
> [VHOST_USER_GET_QUEUE_NUM] = vu_get_queue_num_exec,
> [VHOST_USER_SET_OWNER] = vu_set_owner_exec,
> [VHOST_USER_SET_MEM_TABLE] = vu_set_mem_table_exec,
> + [VHOST_USER_SET_LOG_BASE] = vu_set_log_base_exec,
> [VHOST_USER_SET_LOG_FD] = vu_set_log_fd_exec,
> [VHOST_USER_SET_VRING_NUM] = vu_set_vring_num_exec,
> [VHOST_USER_SET_VRING_ADDR] = vu_set_vring_addr_exec,
> diff --git a/vhost_user.h b/vhost_user.h
> index 2fc0342ff5ba..22a5d059073f 100644
> --- a/vhost_user.h
> +++ b/vhost_user.h
> @@ -15,6 +15,7 @@
> #include "iov.h"
>
> #define VHOST_USER_F_PROTOCOL_FEATURES 30
> +#define VHOST_LOG_PAGE 4096
Does this need to be 65536 on ppc64 and ppc64le? In case, we have
PAGE_SIZE exported by the Makefile in (it uses 'getconf' so it's not
cross-build-safe, we should find a better way eventually).
>
> #define VHOST_MEMORY_BASELINE_NREGIONS 8
>
> @@ -241,5 +242,7 @@ void vu_print_capabilities(void);
> void vu_init(struct ctx *c);
> void vu_cleanup(struct vu_dev *vdev);
> void vu_log_kick(const struct vu_dev *vdev);
> +void vu_log_write(const struct vu_dev *vdev, uint64_t address,
> + uint64_t length);
> void vu_control_handler(struct vu_dev *vdev, int fd, uint32_t events);
> #endif /* VHOST_USER_H */
> diff --git a/virtio.c b/virtio.c
> index 52d5a4d4be52..13838586ad1a 100644
> --- a/virtio.c
> +++ b/virtio.c
> @@ -81,6 +81,7 @@
>
> #include "util.h"
> #include "virtio.h"
> +#include "vhost_user.h"
>
> #define VIRTQUEUE_MAX_SIZE 1024
>
> @@ -592,7 +593,72 @@ static inline void vring_used_write(const struct vu_dev *vdev,
> struct vring_used *used = vq->vring.used;
>
> used->ring[i] = *uelem;
> - (void)vdev;
> + vu_log_write(vdev, vq->vring.log_guest_addr +
> + offsetof(struct vring_used, ring[i]),
> + sizeof(used->ring[i]));
> +}
> +
> +/**
> + * vu_log_queue_fill() -- Log virtqueue memory update
Single '-' between function name and comment.
> + * @dev: Vhost-user device
vhost-user
> + * @vq: Virtqueue
> + * @index: Descriptor ring index
> + * @len: Size of the element
> + */
> +static void vu_log_queue_fill(const struct vu_dev *vdev, struct vu_virtq *vq,
> + unsigned int index, unsigned int len)
> +{
> + struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE];
> + struct vring_desc *desc = vq->vring.desc;
> + unsigned int max, min;
> + unsigned num_bufs = 0;
> + uint64_t read_len;
> +
> + if (!vdev->log_table || !len || !vu_has_feature(vdev, VHOST_F_LOG_ALL))
> + return;
> +
> + max = vq->vring.num;
> +
> + if (le16toh(desc[index].flags) & VRING_DESC_F_INDIRECT) {
> + unsigned int desc_len;
> + uint64_t desc_addr;
> +
> + if (le32toh(desc[index].len) % sizeof(struct vring_desc))
> + die("Invalid size for indirect buffer table");
> +
> + /* loop over the indirect descriptor table */
> + desc_addr = le64toh(desc[index].addr);
> + desc_len = le32toh(desc[index].len);
> + max = desc_len / sizeof(struct vring_desc);
> + read_len = desc_len;
> + desc = vu_gpa_to_va(vdev, &read_len, desc_addr);
> + if (desc && read_len != desc_len) {
> + /* Failed to use zero copy */
Follow-up on the question above: could we skip mmap() if we used only
this path?
> + desc = NULL;
> + if (!virtqueue_read_indirect_desc(vdev, desc_buf,
> + desc_addr,
> + desc_len))
> + desc = desc_buf;
> + }
> +
> + if (!desc)
> + die("Invalid indirect buffer table");
> +
> + index = 0;
> + }
> +
> + do {
> + if (++num_bufs > max)
> + die("Looped descriptor");
> +
> + if (le16toh(desc[index].flags) & VRING_DESC_F_WRITE) {
> + min = MIN(le32toh(desc[index].len), len);
> + vu_log_write(vdev, le64toh(desc[index].addr), min);
> + len -= min;
> + }
> + } while (len > 0 &&
> + (virtqueue_read_next_desc(desc, index, max, &index) ==
> + VIRTQUEUE_READ_DESC_MORE));
It's a bit weird that we could get a negative length because of the
do { } while. That is:
while (len > 0) {
...
if (virtqueue_read_next_desc(desc, index, max, &index) !=
VIRTQUEUE_READ_DESC_MORE))
break;
}
would have looked more natural/safer to me. But perhaps there's a reason
for that.
> }
>
>
> @@ -614,6 +680,8 @@ void vu_queue_fill_by_index(const struct vu_dev *vdev, struct vu_virtq *vq,
> if (!vq->vring.avail)
> return;
>
> + vu_log_queue_fill(vdev, vq, index, len);
> +
> idx = (idx + vq->used_idx) % vq->vring.num;
>
> uelem.id = htole32(index);
> @@ -646,7 +714,9 @@ static inline void vring_used_idx_set(const struct vu_dev *vdev,
> struct vu_virtq *vq, uint16_t val)
> {
> vq->vring.used->idx = htole16(val);
> - (void)vdev;
> + vu_log_write(vdev, vq->vring.log_guest_addr +
> + offsetof(struct vring_used, idx),
> + sizeof(vq->vring.used->idx));
>
> vq->used_idx = val;
> }
> diff --git a/virtio.h b/virtio.h
> index d95bb07bb913..f572341a0034 100644
> --- a/virtio.h
> +++ b/virtio.h
> @@ -104,6 +104,8 @@ struct vu_dev_region {
> * @features: Vhost-user features
> * @protocol_features: Vhost-user protocol features
> * @log_call_fd: Eventfd to report logging update
> + * @log_size: Size of the logging memory region
> + * @log_table: Base of the logging memory region
> */
> struct vu_dev {
> struct ctx *context;
> @@ -113,6 +115,8 @@ struct vu_dev {
> uint64_t features;
> uint64_t protocol_features;
> int log_call_fd;
> + uint64_t log_size;
> + uint8_t *log_table;
> };
>
> /**
--
Stefano
next prev parent reply other threads:[~2025-01-17 18:05 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-19 11:13 [PATCH 0/9] vhost-user: Migration support Laurent Vivier
2024-12-19 11:13 ` [PATCH 1/9] virtio: Use const pointer for vu_dev Laurent Vivier
2024-12-20 0:24 ` David Gibson
2025-01-06 8:58 ` Stefano Brivio
2024-12-19 11:13 ` [PATCH 2/9] vhost-user: update protocol features and commands list Laurent Vivier
2025-01-17 18:04 ` Stefano Brivio
2024-12-19 11:13 ` [PATCH 3/9] vhost-user: add VHOST_USER_SET_LOG_FD command Laurent Vivier
2025-01-17 18:04 ` Stefano Brivio
2024-12-19 11:13 ` [PATCH 4/9] vhost-user: Pass vu_dev to more virtio functions Laurent Vivier
2024-12-19 11:13 ` [PATCH 5/9] vhost-user: add VHOST_USER_SET_LOG_BASE command Laurent Vivier
2025-01-17 18:05 ` Stefano Brivio [this message]
2025-01-20 10:57 ` Laurent Vivier
2025-01-17 19:10 ` Stefano Brivio
2024-12-19 11:13 ` [PATCH 6/9] vhost-user: Report to front-end we support VHOST_USER_PROTOCOL_F_LOG_SHMFD Laurent Vivier
2024-12-19 11:13 ` [PATCH 7/9] vhost-user: add VHOST_USER_CHECK_DEVICE_STATE command Laurent Vivier
2024-12-19 11:13 ` [PATCH 8/9] vhost-user: add VHOST_USER_SET_DEVICE_STATE_FD command Laurent Vivier
2024-12-19 19:47 ` Stefano Brivio
2024-12-20 7:56 ` Laurent Vivier
2024-12-20 13:28 ` Stefano Brivio
2025-01-17 18:05 ` Stefano Brivio
2025-01-20 11:00 ` Laurent Vivier
2025-01-20 20:09 ` Stefano Brivio
2024-12-19 11:14 ` [PATCH 9/9] vhost-user: Report to front-end we support VHOST_USER_PROTOCOL_F_DEVICE_STATE Laurent Vivier
2025-01-17 12:13 ` [PATCH 0/9] vhost-user: Migration support Laurent Vivier
2025-01-17 12:44 ` Stefano Brivio
2025-01-17 13:27 ` Laurent Vivier
2025-01-17 13:38 ` Stefano Brivio
2025-01-17 13:58 ` Laurent Vivier
2025-01-17 14:29 ` Stefano Brivio
2025-01-17 13:31 ` Stefano Brivio
2025-01-17 16:51 ` Stefano Brivio
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250117190502.6590b489@elisabeth \
--to=sbrivio@redhat.com \
--cc=lvivier@redhat.com \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).