From: Laurent Vivier <lvivier@redhat.com>
To: Stefano Brivio <sbrivio@redhat.com>
Cc: passt-dev@passt.top
Subject: Re: [PATCH 5/9] vhost-user: add VHOST_USER_SET_LOG_BASE command
Date: Mon, 20 Jan 2025 11:57:28 +0100 [thread overview]
Message-ID: <d02007ca-0aa0-44f6-9e0b-b267eb016e96@redhat.com> (raw)
In-Reply-To: <20250117190502.6590b489@elisabeth>
On 17/01/2025 19:05, Stefano Brivio wrote:
> On Thu, 19 Dec 2024 12:13:56 +0100
> Laurent Vivier <lvivier@redhat.com> wrote:
>
>> Sets logging shared memory space.
>>
>> When the back-end has VHOST_USER_PROTOCOL_F_LOG_SHMFD protocol feature,
>> the log memory fd is provided in the ancillary data of
>> VHOST_USER_SET_LOG_BASE message, the size and offset of shared memory
>> area provided in the message.
>>
>> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
>> ---
>> util.h | 3 ++
>> vhost_user.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++-
>> vhost_user.h | 3 ++
>> virtio.c | 74 ++++++++++++++++++++++++++++++++++++++++++--
>> virtio.h | 4 +++
>> 5 files changed, 168 insertions(+), 3 deletions(-)
>>
>> diff --git a/util.h b/util.h
>> index 3fa1d12544a0..d02333d5a88d 100644
>> --- a/util.h
>> +++ b/util.h
>> @@ -152,6 +152,9 @@ static inline void barrier(void) { __asm__ __volatile__("" ::: "memory"); }
>> #define smp_wmb() smp_mb_release()
>> #define smp_rmb() smp_mb_acquire()
>>
>> +#define qatomic_or(ptr, n) \
>> + ((void) __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST))
>> +
>> #define NS_FN_STACK_SIZE (1024 * 1024) /* 1MiB */
>>
>> int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
>> diff --git a/vhost_user.c b/vhost_user.c
>> index ce4373d9eeca..c2fac58badf1 100644
>> --- a/vhost_user.c
>> +++ b/vhost_user.c
>> @@ -510,6 +510,12 @@ static bool vu_set_mem_table_exec(struct vu_dev *vdev,
>> */
>> static void vu_close_log(struct vu_dev *vdev)
>> {
>> + if (vdev->log_table) {
>> + if (munmap(vdev->log_table, vdev->log_size) != 0)
>> + die_perror("close log munmap() error");
>> + vdev->log_table = NULL;
>> + }
>> +
>> if (vdev->log_call_fd != -1) {
>> close(vdev->log_call_fd);
>> vdev->log_call_fd = -1;
>> @@ -520,7 +526,6 @@ static void vu_close_log(struct vu_dev *vdev)
>> * vu_log_kick() - Inform the front-end that the log has been modified
>> * @vdev: vhost-user device
>> */
>> -/* cppcheck-suppress unusedFunction */
>> void vu_log_kick(const struct vu_dev *vdev)
>> {
>> if (vdev->log_call_fd != -1) {
>> @@ -532,6 +537,84 @@ void vu_log_kick(const struct vu_dev *vdev)
>> }
>> }
>>
>> +
>> +/**
>
> Excess newline.
>
>> + * vu_log_page() -- Update logging table
>
> Single '-' between function name and comment.
>
>> + * @log_table: Base address of the logging table
>> + * @page: Page number that has been updated
>> + */
>> +/* NOLINTNEXTLINE(readability-non-const-parameter) */
>> +static void vu_log_page(uint8_t *log_table, uint64_t page)
>> +{
>> + qatomic_or(&log_table[page / 8], 1 << (page % 8));
>> +}
>> +
>> +/**
>> + * vu_log_write() -- Log memory write
>
> Single '-' between function name and comment.
>
>> + * @dev: Vhost-user device
>
> vhost-user
>
>> + * @address: Memory address
>> + * @length: Memory size
>> + */
>> +void vu_log_write(const struct vu_dev *vdev, uint64_t address, uint64_t length)
>> +{
>> + uint64_t page;
>> +
>> + if (!vdev->log_table || !length ||
>> + !vu_has_feature(vdev, VHOST_F_LOG_ALL))
>> + return;
>> +
>> + page = address / VHOST_LOG_PAGE;
>> + while (page * VHOST_LOG_PAGE < address + length) {
>> + vu_log_page(vdev->log_table, page);
>> + page++;
>> + }
>> + vu_log_kick(vdev);
>> +}
>> +
>> +/**
>> + * vu_set_log_base_exec() - Set the memory log base
>> + * @vdev: vhost-user device
>> + * @vmsg: vhost-user message
>> + *
>> + * Return: False as no reply is requested
>> + *
>> + * #syscalls:vu mmap munmap
>
> I wonder: will there be a way around this the day that we want to
> disable mmap() for vhost-user mode too?
I don't think we can bypass the use of mmap.
>
>> + */
>> +static bool vu_set_log_base_exec(struct vu_dev *vdev,
>> + struct vhost_user_msg *msg)
>> +{
>> + uint64_t log_mmap_size, log_mmap_offset;
>> + void *base;
>> + int fd;
>> +
>> + if (msg->fd_num != 1 || msg->hdr.size != sizeof(msg->payload.log))
>> + die("Invalid log_base message");
>
> Maybe prefix this with "vhost-user:", otherwise it's not really clear
> where it's coming from.
>
>> +
>> + fd = msg->fds[0];
>> + log_mmap_offset = msg->payload.log.mmap_offset;
>> + log_mmap_size = msg->payload.log.mmap_size;
>> +
>> + debug("Log mmap_offset: %"PRId64, log_mmap_offset);
>> + debug("Log mmap_size: %"PRId64, log_mmap_size);
>> +
>> + base = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
>> + log_mmap_offset);
>> + close(fd);
>> + if (base == MAP_FAILED)
>> + die("log mmap error");
>
> Same here.
>
>> +
>> + if (vdev->log_table)
>> + munmap(vdev->log_table, vdev->log_size);
>> +
>> + vdev->log_table = base;
>> + vdev->log_size = log_mmap_size;
>> +
>> + msg->hdr.size = sizeof(msg->payload.u64);
>> + msg->fd_num = 0;
>> +
>> + return true;
>> +}
>> +
>> /**
>> * vu_set_log_fd_exec() -- Set the eventfd used to report logging update
>> * @vdev: vhost-user device
>> @@ -915,6 +998,7 @@ void vu_init(struct ctx *c)
>> .notification = true,
>> };
>> }
>> + c->vdev->log_table = NULL;
>> c->vdev->log_call_fd = -1;
>> }
>>
>> @@ -984,6 +1068,7 @@ static bool (*vu_handle[VHOST_USER_MAX])(struct vu_dev *vdev,
>> [VHOST_USER_GET_QUEUE_NUM] = vu_get_queue_num_exec,
>> [VHOST_USER_SET_OWNER] = vu_set_owner_exec,
>> [VHOST_USER_SET_MEM_TABLE] = vu_set_mem_table_exec,
>> + [VHOST_USER_SET_LOG_BASE] = vu_set_log_base_exec,
>> [VHOST_USER_SET_LOG_FD] = vu_set_log_fd_exec,
>> [VHOST_USER_SET_VRING_NUM] = vu_set_vring_num_exec,
>> [VHOST_USER_SET_VRING_ADDR] = vu_set_vring_addr_exec,
>> diff --git a/vhost_user.h b/vhost_user.h
>> index 2fc0342ff5ba..22a5d059073f 100644
>> --- a/vhost_user.h
>> +++ b/vhost_user.h
>> @@ -15,6 +15,7 @@
>> #include "iov.h"
>>
>> #define VHOST_USER_F_PROTOCOL_FEATURES 30
>> +#define VHOST_LOG_PAGE 4096
>
> Does this need to be 65536 on ppc64 and ppc64le? In case, we have
> PAGE_SIZE exported by the Makefile in (it uses 'getconf' so it's not
> cross-build-safe, we should find a better way eventually).
VHOST_LOG_PAGE is defined as 0x1000 and does not depend on the architecture type.
https://qemu-project.gitlab.io/qemu/interop/vhost-user.html#migration
>
>>
>> #define VHOST_MEMORY_BASELINE_NREGIONS 8
>>
>> @@ -241,5 +242,7 @@ void vu_print_capabilities(void);
>> void vu_init(struct ctx *c);
>> void vu_cleanup(struct vu_dev *vdev);
>> void vu_log_kick(const struct vu_dev *vdev);
>> +void vu_log_write(const struct vu_dev *vdev, uint64_t address,
>> + uint64_t length);
>> void vu_control_handler(struct vu_dev *vdev, int fd, uint32_t events);
>> #endif /* VHOST_USER_H */
>> diff --git a/virtio.c b/virtio.c
>> index 52d5a4d4be52..13838586ad1a 100644
>> --- a/virtio.c
>> +++ b/virtio.c
>> @@ -81,6 +81,7 @@
>>
>> #include "util.h"
>> #include "virtio.h"
>> +#include "vhost_user.h"
>>
>> #define VIRTQUEUE_MAX_SIZE 1024
>>
>> @@ -592,7 +593,72 @@ static inline void vring_used_write(const struct vu_dev *vdev,
>> struct vring_used *used = vq->vring.used;
>>
>> used->ring[i] = *uelem;
>> - (void)vdev;
>> + vu_log_write(vdev, vq->vring.log_guest_addr +
>> + offsetof(struct vring_used, ring[i]),
>> + sizeof(used->ring[i]));
>> +}
>> +
>> +/**
>> + * vu_log_queue_fill() -- Log virtqueue memory update
>
> Single '-' between function name and comment.
>
>> + * @dev: Vhost-user device
>
> vhost-user
>
>> + * @vq: Virtqueue
>> + * @index: Descriptor ring index
>> + * @len: Size of the element
>> + */
>> +static void vu_log_queue_fill(const struct vu_dev *vdev, struct vu_virtq *vq,
>> + unsigned int index, unsigned int len)
>> +{
>> + struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE];
>> + struct vring_desc *desc = vq->vring.desc;
>> + unsigned int max, min;
>> + unsigned num_bufs = 0;
>> + uint64_t read_len;
>> +
>> + if (!vdev->log_table || !len || !vu_has_feature(vdev, VHOST_F_LOG_ALL))
>> + return;
>> +
>> + max = vq->vring.num;
>> +
>> + if (le16toh(desc[index].flags) & VRING_DESC_F_INDIRECT) {
>> + unsigned int desc_len;
>> + uint64_t desc_addr;
>> +
>> + if (le32toh(desc[index].len) % sizeof(struct vring_desc))
>> + die("Invalid size for indirect buffer table");
>> +
>> + /* loop over the indirect descriptor table */
>> + desc_addr = le64toh(desc[index].addr);
>> + desc_len = le32toh(desc[index].len);
>> + max = desc_len / sizeof(struct vring_desc);
>> + read_len = desc_len;
>> + desc = vu_gpa_to_va(vdev, &read_len, desc_addr);
>> + if (desc && read_len != desc_len) {
>> + /* Failed to use zero copy */
>
> Follow-up on the question above: could we skip mmap() if we used only
> this path?
We need to acces guest memory, so no.
>
>> + desc = NULL;
>> + if (!virtqueue_read_indirect_desc(vdev, desc_buf,
>> + desc_addr,
>> + desc_len))
>> + desc = desc_buf;
>> + }
>> +
>> + if (!desc)
>> + die("Invalid indirect buffer table");
>> +
>> + index = 0;
>> + }
>> +
>> + do {
>> + if (++num_bufs > max)
>> + die("Looped descriptor");
>> +
>> + if (le16toh(desc[index].flags) & VRING_DESC_F_WRITE) {
>> + min = MIN(le32toh(desc[index].len), len);
>> + vu_log_write(vdev, le64toh(desc[index].addr), min);
>> + len -= min;
>> + }
>> + } while (len > 0 &&
>> + (virtqueue_read_next_desc(desc, index, max, &index) ==
>> + VIRTQUEUE_READ_DESC_MORE));
>
> It's a bit weird that we could get a negative length because of the
> do { } while. That is:
>
> while (len > 0) {
> ...
> if (virtqueue_read_next_desc(desc, index, max, &index) !=
> VIRTQUEUE_READ_DESC_MORE))
> break;
> }
>
> would have looked more natural/safer to me. But perhaps there's a reason
> for that.
In fact, it's copied from QEMU, I didn't want to change the code.
>> }
>>
>>
>> @@ -614,6 +680,8 @@ void vu_queue_fill_by_index(const struct vu_dev *vdev, struct vu_virtq *vq,
>> if (!vq->vring.avail)
>> return;
>>
>> + vu_log_queue_fill(vdev, vq, index, len);
>> +
>> idx = (idx + vq->used_idx) % vq->vring.num;
>>
>> uelem.id = htole32(index);
>> @@ -646,7 +714,9 @@ static inline void vring_used_idx_set(const struct vu_dev *vdev,
>> struct vu_virtq *vq, uint16_t val)
>> {
>> vq->vring.used->idx = htole16(val);
>> - (void)vdev;
>> + vu_log_write(vdev, vq->vring.log_guest_addr +
>> + offsetof(struct vring_used, idx),
>> + sizeof(vq->vring.used->idx));
>>
>> vq->used_idx = val;
>> }
>> diff --git a/virtio.h b/virtio.h
>> index d95bb07bb913..f572341a0034 100644
>> --- a/virtio.h
>> +++ b/virtio.h
>> @@ -104,6 +104,8 @@ struct vu_dev_region {
>> * @features: Vhost-user features
>> * @protocol_features: Vhost-user protocol features
>> * @log_call_fd: Eventfd to report logging update
>> + * @log_size: Size of the logging memory region
>> + * @log_table: Base of the logging memory region
>> */
>> struct vu_dev {
>> struct ctx *context;
>> @@ -113,6 +115,8 @@ struct vu_dev {
>> uint64_t features;
>> uint64_t protocol_features;
>> int log_call_fd;
>> + uint64_t log_size;
>> + uint8_t *log_table;
>> };
>>
>> /**
>
Thanks,
Laurent
next prev parent reply other threads:[~2025-01-20 10:57 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-19 11:13 [PATCH 0/9] vhost-user: Migration support Laurent Vivier
2024-12-19 11:13 ` [PATCH 1/9] virtio: Use const pointer for vu_dev Laurent Vivier
2024-12-20 0:24 ` David Gibson
2025-01-06 8:58 ` Stefano Brivio
2024-12-19 11:13 ` [PATCH 2/9] vhost-user: update protocol features and commands list Laurent Vivier
2025-01-17 18:04 ` Stefano Brivio
2024-12-19 11:13 ` [PATCH 3/9] vhost-user: add VHOST_USER_SET_LOG_FD command Laurent Vivier
2025-01-17 18:04 ` Stefano Brivio
2024-12-19 11:13 ` [PATCH 4/9] vhost-user: Pass vu_dev to more virtio functions Laurent Vivier
2024-12-19 11:13 ` [PATCH 5/9] vhost-user: add VHOST_USER_SET_LOG_BASE command Laurent Vivier
2025-01-17 18:05 ` Stefano Brivio
2025-01-20 10:57 ` Laurent Vivier [this message]
2025-01-17 19:10 ` Stefano Brivio
2024-12-19 11:13 ` [PATCH 6/9] vhost-user: Report to front-end we support VHOST_USER_PROTOCOL_F_LOG_SHMFD Laurent Vivier
2024-12-19 11:13 ` [PATCH 7/9] vhost-user: add VHOST_USER_CHECK_DEVICE_STATE command Laurent Vivier
2024-12-19 11:13 ` [PATCH 8/9] vhost-user: add VHOST_USER_SET_DEVICE_STATE_FD command Laurent Vivier
2024-12-19 19:47 ` Stefano Brivio
2024-12-20 7:56 ` Laurent Vivier
2024-12-20 13:28 ` Stefano Brivio
2025-01-17 18:05 ` Stefano Brivio
2025-01-20 11:00 ` Laurent Vivier
2025-01-20 20:09 ` Stefano Brivio
2024-12-19 11:14 ` [PATCH 9/9] vhost-user: Report to front-end we support VHOST_USER_PROTOCOL_F_DEVICE_STATE Laurent Vivier
2025-01-17 12:13 ` [PATCH 0/9] vhost-user: Migration support Laurent Vivier
2025-01-17 12:44 ` Stefano Brivio
2025-01-17 13:27 ` Laurent Vivier
2025-01-17 13:38 ` Stefano Brivio
2025-01-17 13:58 ` Laurent Vivier
2025-01-17 14:29 ` Stefano Brivio
2025-01-17 13:31 ` Stefano Brivio
2025-01-17 16:51 ` Stefano Brivio
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=d02007ca-0aa0-44f6-9e0b-b267eb016e96@redhat.com \
--to=lvivier@redhat.com \
--cc=passt-dev@passt.top \
--cc=sbrivio@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).