public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: Stefano Brivio <sbrivio@redhat.com>
To: Laurent Vivier <lvivier@redhat.com>
Cc: passt-dev@passt.top
Subject: Re: [PATCH 5/9] vhost-user: add VHOST_USER_SET_LOG_BASE command
Date: Fri, 17 Jan 2025 19:05:02 +0100	[thread overview]
Message-ID: <20250117190502.6590b489@elisabeth> (raw)
In-Reply-To: <20241219111400.2352110-6-lvivier@redhat.com>

On Thu, 19 Dec 2024 12:13:56 +0100
Laurent Vivier <lvivier@redhat.com> wrote:

> Sets logging shared memory space.
> 
> When the back-end has VHOST_USER_PROTOCOL_F_LOG_SHMFD protocol feature,
> the log memory fd is provided in the ancillary data of
> VHOST_USER_SET_LOG_BASE message, the size and offset of shared memory
> area provided in the message.
> 
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> ---
>  util.h       |  3 ++
>  vhost_user.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++-
>  vhost_user.h |  3 ++
>  virtio.c     | 74 ++++++++++++++++++++++++++++++++++++++++++--
>  virtio.h     |  4 +++
>  5 files changed, 168 insertions(+), 3 deletions(-)
> 
> diff --git a/util.h b/util.h
> index 3fa1d12544a0..d02333d5a88d 100644
> --- a/util.h
> +++ b/util.h
> @@ -152,6 +152,9 @@ static inline void barrier(void) { __asm__ __volatile__("" ::: "memory"); }
>  #define smp_wmb()	smp_mb_release()
>  #define smp_rmb()	smp_mb_acquire()
>  
> +#define qatomic_or(ptr, n) \
> +	((void) __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST))
> +
>  #define NS_FN_STACK_SIZE	(1024 * 1024) /* 1MiB */
>  
>  int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
> diff --git a/vhost_user.c b/vhost_user.c
> index ce4373d9eeca..c2fac58badf1 100644
> --- a/vhost_user.c
> +++ b/vhost_user.c
> @@ -510,6 +510,12 @@ static bool vu_set_mem_table_exec(struct vu_dev *vdev,
>   */
>  static void vu_close_log(struct vu_dev *vdev)
>  {
> +	if (vdev->log_table) {
> +		if (munmap(vdev->log_table, vdev->log_size) != 0)
> +			die_perror("close log munmap() error");
> +		vdev->log_table = NULL;
> +	}
> +
>  	if (vdev->log_call_fd != -1) {
>  		close(vdev->log_call_fd);
>  		vdev->log_call_fd = -1;
> @@ -520,7 +526,6 @@ static void vu_close_log(struct vu_dev *vdev)
>   * vu_log_kick() - Inform the front-end that the log has been modified
>   * @vdev:	vhost-user device
>   */
> -/* cppcheck-suppress unusedFunction */
>  void vu_log_kick(const struct vu_dev *vdev)
>  {
>  	if (vdev->log_call_fd != -1) {
> @@ -532,6 +537,84 @@ void vu_log_kick(const struct vu_dev *vdev)
>  	}
>  }
>  
> +
> +/**

Excess newline.

> + * vu_log_page() -- Update logging table

Single '-' between function name and comment.

> + * @log_table:	Base address of the logging table
> + * @page:	Page number that has been updated
> + */
> +/* NOLINTNEXTLINE(readability-non-const-parameter) */
> +static void vu_log_page(uint8_t *log_table, uint64_t page)
> +{
> +	qatomic_or(&log_table[page / 8], 1 << (page % 8));
> +}
> +
> +/**
> + * vu_log_write() -- Log memory write

Single '-' between function name and comment.

> + * @dev:	Vhost-user device

vhost-user

> + * @address:	Memory address
> + * @length:	Memory size
> + */
> +void vu_log_write(const struct vu_dev *vdev, uint64_t address, uint64_t length)
> +{
> +	uint64_t page;
> +
> +	if (!vdev->log_table || !length ||
> +	    !vu_has_feature(vdev, VHOST_F_LOG_ALL))
> +		return;
> +
> +	page = address / VHOST_LOG_PAGE;
> +	while (page * VHOST_LOG_PAGE < address + length) {
> +		vu_log_page(vdev->log_table, page);
> +		page++;
> +	}
> +	vu_log_kick(vdev);
> +}
> +
> +/**
> + * vu_set_log_base_exec() - Set the memory log base
> + * @vdev:	vhost-user device
> + * @vmsg:	vhost-user message
> + *
> + * Return: False as no reply is requested
> + *
> + * #syscalls:vu mmap munmap

I wonder: will there be a way around this the day that we want to
disable mmap() for vhost-user mode too?

> + */
> +static bool vu_set_log_base_exec(struct vu_dev *vdev,
> +				 struct vhost_user_msg *msg)
> +{
> +	uint64_t log_mmap_size, log_mmap_offset;
> +	void *base;
> +	int fd;
> +
> +	if (msg->fd_num != 1 || msg->hdr.size != sizeof(msg->payload.log))
> +		die("Invalid log_base message");

Maybe prefix this with "vhost-user:", otherwise it's not really clear
where it's coming from.

> +
> +	fd = msg->fds[0];
> +	log_mmap_offset = msg->payload.log.mmap_offset;
> +	log_mmap_size = msg->payload.log.mmap_size;
> +
> +	debug("Log mmap_offset: %"PRId64, log_mmap_offset);
> +	debug("Log mmap_size:   %"PRId64, log_mmap_size);
> +
> +	base = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
> +		    log_mmap_offset);
> +	close(fd);
> +	if (base == MAP_FAILED)
> +		die("log mmap error");

Same here.

> +
> +	if (vdev->log_table)
> +		munmap(vdev->log_table, vdev->log_size);
> +
> +	vdev->log_table = base;
> +	vdev->log_size = log_mmap_size;
> +
> +	msg->hdr.size = sizeof(msg->payload.u64);
> +	msg->fd_num = 0;
> +
> +	return true;
> +}
> +
>  /**
>   * vu_set_log_fd_exec() -- Set the eventfd used to report logging update
>   * @vdev:	vhost-user device
> @@ -915,6 +998,7 @@ void vu_init(struct ctx *c)
>  			.notification = true,
>  		};
>  	}
> +	c->vdev->log_table = NULL;
>  	c->vdev->log_call_fd = -1;
>  }
>  
> @@ -984,6 +1068,7 @@ static bool (*vu_handle[VHOST_USER_MAX])(struct vu_dev *vdev,
>  	[VHOST_USER_GET_QUEUE_NUM]	   = vu_get_queue_num_exec,
>  	[VHOST_USER_SET_OWNER]		   = vu_set_owner_exec,
>  	[VHOST_USER_SET_MEM_TABLE]	   = vu_set_mem_table_exec,
> +	[VHOST_USER_SET_LOG_BASE]	   = vu_set_log_base_exec,
>  	[VHOST_USER_SET_LOG_FD]		   = vu_set_log_fd_exec,
>  	[VHOST_USER_SET_VRING_NUM]	   = vu_set_vring_num_exec,
>  	[VHOST_USER_SET_VRING_ADDR]	   = vu_set_vring_addr_exec,
> diff --git a/vhost_user.h b/vhost_user.h
> index 2fc0342ff5ba..22a5d059073f 100644
> --- a/vhost_user.h
> +++ b/vhost_user.h
> @@ -15,6 +15,7 @@
>  #include "iov.h"
>  
>  #define VHOST_USER_F_PROTOCOL_FEATURES 30
> +#define VHOST_LOG_PAGE 4096

Does this need to be 65536 on ppc64 and ppc64le? In case, we have
PAGE_SIZE exported by the Makefile in (it uses 'getconf' so it's not
cross-build-safe, we should find a better way eventually).

>  
>  #define VHOST_MEMORY_BASELINE_NREGIONS 8
>  
> @@ -241,5 +242,7 @@ void vu_print_capabilities(void);
>  void vu_init(struct ctx *c);
>  void vu_cleanup(struct vu_dev *vdev);
>  void vu_log_kick(const struct vu_dev *vdev);
> +void vu_log_write(const struct vu_dev *vdev, uint64_t address,
> +		  uint64_t length);
>  void vu_control_handler(struct vu_dev *vdev, int fd, uint32_t events);
>  #endif /* VHOST_USER_H */
> diff --git a/virtio.c b/virtio.c
> index 52d5a4d4be52..13838586ad1a 100644
> --- a/virtio.c
> +++ b/virtio.c
> @@ -81,6 +81,7 @@
>  
>  #include "util.h"
>  #include "virtio.h"
> +#include "vhost_user.h"
>  
>  #define VIRTQUEUE_MAX_SIZE 1024
>  
> @@ -592,7 +593,72 @@ static inline void vring_used_write(const struct vu_dev *vdev,
>  	struct vring_used *used = vq->vring.used;
>  
>  	used->ring[i] = *uelem;
> -	(void)vdev;
> +	vu_log_write(vdev, vq->vring.log_guest_addr +
> +		     offsetof(struct vring_used, ring[i]),
> +		     sizeof(used->ring[i]));
> +}
> +
> +/**
> + * vu_log_queue_fill() -- Log virtqueue memory update

Single '-' between function name and comment.

> + * @dev:	Vhost-user device

vhost-user

> + * @vq:		Virtqueue
> + * @index:	Descriptor ring index
> + * @len:	Size of the element
> + */
> +static void vu_log_queue_fill(const struct vu_dev *vdev, struct vu_virtq *vq,
> +			      unsigned int index, unsigned int len)
> +{
> +	struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE];
> +	struct vring_desc *desc = vq->vring.desc;
> +	unsigned int max, min;
> +	unsigned num_bufs = 0;
> +	uint64_t read_len;
> +
> +	if (!vdev->log_table || !len || !vu_has_feature(vdev, VHOST_F_LOG_ALL))
> +		return;
> +
> +	max = vq->vring.num;
> +
> +	if (le16toh(desc[index].flags) & VRING_DESC_F_INDIRECT) {
> +		unsigned int desc_len;
> +		uint64_t desc_addr;
> +
> +		if (le32toh(desc[index].len) % sizeof(struct vring_desc))
> +			die("Invalid size for indirect buffer table");
> +
> +		/* loop over the indirect descriptor table */
> +		desc_addr = le64toh(desc[index].addr);
> +		desc_len = le32toh(desc[index].len);
> +		max = desc_len / sizeof(struct vring_desc);
> +		read_len = desc_len;
> +		desc = vu_gpa_to_va(vdev, &read_len, desc_addr);
> +		if (desc && read_len != desc_len) {
> +			/* Failed to use zero copy */

Follow-up on the question above: could we skip mmap() if we used only
this path?

> +			desc = NULL;
> +			if (!virtqueue_read_indirect_desc(vdev, desc_buf,
> +							  desc_addr,
> +							  desc_len))
> +				desc = desc_buf;
> +		}
> +
> +		if (!desc)
> +			die("Invalid indirect buffer table");
> +
> +		index = 0;
> +	}
> +
> +	do {
> +		if (++num_bufs > max)
> +			die("Looped descriptor");
> +
> +		if (le16toh(desc[index].flags) & VRING_DESC_F_WRITE) {
> +			min = MIN(le32toh(desc[index].len), len);
> +			vu_log_write(vdev, le64toh(desc[index].addr), min);
> +			len -= min;
> +		}
> +	} while (len > 0 &&
> +		 (virtqueue_read_next_desc(desc, index, max, &index) ==
> +		  VIRTQUEUE_READ_DESC_MORE));

It's a bit weird that we could get a negative length because of the
do { } while. That is:

	while (len > 0) {
		...
		if (virtqueue_read_next_desc(desc, index, max, &index) !=
		    VIRTQUEUE_READ_DESC_MORE))
			break;
	}

would have looked more natural/safer to me. But perhaps there's a reason
for that.

>  }
>  
>  
> @@ -614,6 +680,8 @@ void vu_queue_fill_by_index(const struct vu_dev *vdev, struct vu_virtq *vq,
>  	if (!vq->vring.avail)
>  		return;
>  
> +	vu_log_queue_fill(vdev, vq, index, len);
> +
>  	idx = (idx + vq->used_idx) % vq->vring.num;
>  
>  	uelem.id = htole32(index);
> @@ -646,7 +714,9 @@ static inline void vring_used_idx_set(const struct vu_dev *vdev,
>  				      struct vu_virtq *vq, uint16_t val)
>  {
>  	vq->vring.used->idx = htole16(val);
> -	(void)vdev;
> +	vu_log_write(vdev, vq->vring.log_guest_addr +
> +		     offsetof(struct vring_used, idx),
> +		     sizeof(vq->vring.used->idx));
>  
>  	vq->used_idx = val;
>  }
> diff --git a/virtio.h b/virtio.h
> index d95bb07bb913..f572341a0034 100644
> --- a/virtio.h
> +++ b/virtio.h
> @@ -104,6 +104,8 @@ struct vu_dev_region {
>   * @features:		Vhost-user features
>   * @protocol_features:	Vhost-user protocol features
>   * @log_call_fd:	Eventfd to report logging update
> + * @log_size:		Size of the logging memory region
> + * @log_table:		Base of the logging memory region
>   */
>  struct vu_dev {
>  	struct ctx *context;
> @@ -113,6 +115,8 @@ struct vu_dev {
>  	uint64_t features;
>  	uint64_t protocol_features;
>  	int log_call_fd;
> +	uint64_t log_size;
> +	uint8_t *log_table;
>  };
>  
>  /**

-- 
Stefano


  reply	other threads:[~2025-01-17 18:05 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-12-19 11:13 [PATCH 0/9] vhost-user: Migration support Laurent Vivier
2024-12-19 11:13 ` [PATCH 1/9] virtio: Use const pointer for vu_dev Laurent Vivier
2024-12-20  0:24   ` David Gibson
2025-01-06  8:58     ` Stefano Brivio
2024-12-19 11:13 ` [PATCH 2/9] vhost-user: update protocol features and commands list Laurent Vivier
2025-01-17 18:04   ` Stefano Brivio
2024-12-19 11:13 ` [PATCH 3/9] vhost-user: add VHOST_USER_SET_LOG_FD command Laurent Vivier
2025-01-17 18:04   ` Stefano Brivio
2024-12-19 11:13 ` [PATCH 4/9] vhost-user: Pass vu_dev to more virtio functions Laurent Vivier
2024-12-19 11:13 ` [PATCH 5/9] vhost-user: add VHOST_USER_SET_LOG_BASE command Laurent Vivier
2025-01-17 18:05   ` Stefano Brivio [this message]
2025-01-20 10:57     ` Laurent Vivier
2025-01-17 19:10   ` Stefano Brivio
2024-12-19 11:13 ` [PATCH 6/9] vhost-user: Report to front-end we support VHOST_USER_PROTOCOL_F_LOG_SHMFD Laurent Vivier
2024-12-19 11:13 ` [PATCH 7/9] vhost-user: add VHOST_USER_CHECK_DEVICE_STATE command Laurent Vivier
2024-12-19 11:13 ` [PATCH 8/9] vhost-user: add VHOST_USER_SET_DEVICE_STATE_FD command Laurent Vivier
2024-12-19 19:47   ` Stefano Brivio
2024-12-20  7:56     ` Laurent Vivier
2024-12-20 13:28       ` Stefano Brivio
2025-01-17 18:05   ` Stefano Brivio
2025-01-20 11:00     ` Laurent Vivier
2025-01-20 20:09       ` Stefano Brivio
2024-12-19 11:14 ` [PATCH 9/9] vhost-user: Report to front-end we support VHOST_USER_PROTOCOL_F_DEVICE_STATE Laurent Vivier
2025-01-17 12:13 ` [PATCH 0/9] vhost-user: Migration support Laurent Vivier
2025-01-17 12:44   ` Stefano Brivio
2025-01-17 13:27     ` Laurent Vivier
2025-01-17 13:38       ` Stefano Brivio
2025-01-17 13:58         ` Laurent Vivier
2025-01-17 14:29           ` Stefano Brivio
2025-01-17 13:31     ` Stefano Brivio
2025-01-17 16:51 ` Stefano Brivio

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250117190502.6590b489@elisabeth \
    --to=sbrivio@redhat.com \
    --cc=lvivier@redhat.com \
    --cc=passt-dev@passt.top \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).