From: David Gibson <david@gibson.dropbear.id.au>
To: Laurent Vivier <lvivier@redhat.com>
Cc: passt-dev@passt.top
Subject: Re: [PATCH v5 02/12] vhost-user: Advertise multiqueue support
Date: Fri, 19 Jun 2026 15:17:53 +1000 [thread overview]
Message-ID: <ajTRAdY3qECRUN0A@zatzit> (raw)
In-Reply-To: <20260616125130.1324274-3-lvivier@redhat.com>
[-- Attachment #1: Type: text/plain, Size: 11882 bytes --]
On Tue, Jun 16, 2026 at 02:51:20PM +0200, Laurent Vivier wrote:
> Allow the guest to negotiate multiple virtqueue pairs by advertising
> VIRTIO_NET_F_MQ and VHOST_USER_PROTOCOL_F_MQ feature flags, and
> increase VHOST_USER_MAX_VQS from 2 to 32, supporting up to 16 queue
> pairs.
>
> Replace the VHOST_USER_RX_QUEUE, VHOST_USER_TX_QUEUE,
> VHOST_USER_IS_QUEUE_TX(), and VHOST_USER_IS_QUEUE_RX() macros with a
> general set of QPAIR_* macros in passt.h that translate between queue
> pair numbers and virtqueue indices. These are needed now that queue
> indices are no longer limited to 0 and 1.
>
> Add a queue pair parameter to vu_send_single(), propagating it to the
> virtqueue selection. All callers currently pass QPAIR_DEFAULT (0):
> only the first RX queue is used for receiving. The guest kernel
> selects which TX queue to use for transmission. Full multi-RX-queue
> load balancing will be implemented separately.
>
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
A handful of comments, but they're all more or less cosmetic.
> ---
> passt.h | 8 ++++++++
> tap.c | 9 ++++++---
> tcp_vu.c | 7 +++++--
> udp_vu.c | 3 ++-
> vhost_user.c | 14 +++++++++-----
> vhost_user.h | 9 ---------
> virtio.h | 2 +-
> vu_common.c | 17 +++++++++++------
> vu_common.h | 3 ++-
> 9 files changed, 44 insertions(+), 28 deletions(-)
>
> diff --git a/passt.h b/passt.h
> index 07126a969551..bf8a1e037317 100644
> --- a/passt.h
> +++ b/passt.h
> @@ -28,6 +28,14 @@ union epoll_ref;
> #include "udp.h"
> #include "vhost_user.h"
>
> +/* Queue pairs consist of one RX queue (even index) and one TX queue (odd index).
> + * Example: pair 0 has RX queue 0 and TX queue 1; pair 1 has RX queue 2 and TX queue 3.
> + */
> +#define QPAIR_DEFAULT 0 /* Default queue pair */
> +#define QPAIR_FROMGUEST_QUEUE(qpair) ((size_t)(qpair) * 2 + 1) /* TX queue index from pair */
> +#define QPAIR_TOGUEST_QUEUE(qpair) ((size_t)(qpair) * 2) /* RX queue index from pair */
> +#define QPAIR_FROM_QUEUE(queue) ((queue) / 2) /* Extract pair from queue */
> +
> /* Default address for our end on the tap interface. Bit 0 of byte 0 must be 0
> * (unicast) and bit 1 of byte 1 must be 1 (locally administered). Otherwise
> * it's arbitrary.
> diff --git a/tap.c b/tap.c
> index 4699c5ef9177..a5d22088424f 100644
> --- a/tap.c
> +++ b/tap.c
> @@ -155,7 +155,7 @@ void tap_send_single(const struct ctx *c, const void *data, size_t l2len)
> tap_send_frames(c, iov, iovcnt, 1);
> break;
> case MODE_VU:
> - vu_send_single(c, data, l2len);
> + vu_send_single(c, QPAIR_DEFAULT, data, l2len);
> break;
> }
> }
> @@ -1379,8 +1379,11 @@ static void tap_backend_show_hints(const struct ctx *c)
> break;
> case MODE_VU:
> info("You can start qemu with:");
> - info(" kvm ... -chardev socket,id=chr0,path=%s -netdev vhost-user,id=netdev0,chardev=chr0 -device virtio-net,netdev=netdev0 -object memory-backend-memfd,id=memfd0,share=on,size=$RAMSIZE -numa node,memdev=memfd0\n",
> - c->sock_path);
> + info(" kvm ... -chardev socket,id=chr0,path=%s "
> + "-netdev vhost-user,id=netdev0,chardev=chr0,queues=$QUEUES "
> + "-device virtio-net,netdev=netdev0,mq=true "
> + "-object memory-backend-memfd,id=memfd0,share=on,size=$RAMSIZE "
> + "-numa node,memdev=memfd0\n", c->sock_path);
> break;
> }
> }
> diff --git a/tcp_vu.c b/tcp_vu.c
> index 7e2a7dbc81e1..9ef6b5242c9c 100644
> --- a/tcp_vu.c
> +++ b/tcp_vu.c
> @@ -124,7 +124,8 @@ static int tcp_vu_send_dup(const struct ctx *c, struct vu_virtq *vq,
> int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
> {
> struct vu_dev *vdev = c->vdev;
> - struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
> + int rx_queue = QPAIR_TOGUEST_QUEUE(QPAIR_DEFAULT);
Nit: I like the "toguest" terminology in the macro, but the variable
name still uses the somewhat confusing "rx" term. Since this is a
strictly to-guest function, would just "queue" or "qindex" do?
> + struct vu_virtq *vq = &vdev->vq[rx_queue];
> size_t optlen, hdrlen, iov_cnt, iov_used;
> struct vu_virtq_element flags_elem[2];
> struct iov_tail payload, l2frame;
> @@ -429,8 +430,10 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
> int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
> {
> uint32_t wnd_scaled = conn->wnd_from_tap << conn->ws_from_tap;
> + unsigned int qpair = QPAIR_DEFAULT;
> + int rx_queue = QPAIR_TOGUEST_QUEUE(qpair);
Ditto.
> struct vu_dev *vdev = c->vdev;
> - struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
> + struct vu_virtq *vq = &vdev->vq[rx_queue];
> uint32_t already_sent, check;
> ssize_t len, previous_dlen;
> int i, elem_cnt, frame_cnt;
> diff --git a/udp_vu.c b/udp_vu.c
> index e4fb105730bf..b1a8ad76a691 100644
> --- a/udp_vu.c
> +++ b/udp_vu.c
> @@ -147,8 +147,9 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
> bool v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
> static struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE];
> static struct iovec iov_vu[VIRTQUEUE_MAX_SIZE];
> + int rx_queue = QPAIR_TOGUEST_QUEUE(QPAIR_DEFAULT);
Ditto.
> struct vu_dev *vdev = c->vdev;
> - struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
> + struct vu_virtq *vq = &vdev->vq[rx_queue];
> size_t hdrlen = udp_vu_hdrlen(v6);
> int i;
>
> diff --git a/vhost_user.c b/vhost_user.c
> index a1259c2624c0..fa13c66d5f0d 100644
> --- a/vhost_user.c
> +++ b/vhost_user.c
> @@ -324,6 +324,7 @@ static bool vu_get_features_exec(struct vu_dev *vdev,
> 1ULL << VIRTIO_F_VERSION_1 |
> 1ULL << VIRTIO_NET_F_GUEST_CSUM |
> 1ULL << VIRTIO_NET_F_MRG_RXBUF |
> + 1ULL << VIRTIO_NET_F_MQ |
> 1ULL << VHOST_F_LOG_ALL |
> 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
>
> @@ -770,7 +771,8 @@ static void vu_check_queue_msg_file(struct vhost_user_msg *vmsg)
> bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
>
> if (idx >= VHOST_USER_MAX_VQS)
> - die("Invalid vhost-user queue index: %u", idx);
> + die("Invalid vhost-user queue index: %u (maximum %u)", idx,
> + VHOST_USER_MAX_VQS);
Nit: the maximum index should be (VHOST_USER_MAX_VQS - 1), no?
>
> if (nofd) {
> vmsg_close_fds(vmsg);
> @@ -812,7 +814,9 @@ static bool vu_set_vring_kick_exec(struct vu_dev *vdev,
>
> vdev->vq[idx].started = true;
>
> - if (vdev->vq[idx].kick_fd != -1 && VHOST_USER_IS_QUEUE_TX(idx)) {
> + if (vdev->vq[idx].kick_fd != -1 &&
> + QPAIR_FROMGUEST_QUEUE(QPAIR_FROM_QUEUE(idx)) ==
> + (unsigned int)idx) {
This seems a very awkward way of testing if idx is even. AFAICT
VHOST_USER_IS_QUEUE_TX() was already correct for multiple qpairs, why
remove it? (That said renaming it to QUEUE_IS_FROMGUEST or something
would be nice).
> vu_set_watch(vdev, idx);
> debug("Waiting for kicks on fd: %d for vq: %d",
> vdev->vq[idx].kick_fd, idx);
> @@ -899,7 +903,8 @@ static bool vu_get_protocol_features_exec(struct vu_dev *vdev,
> uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK |
> 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD |
> 1ULL << VHOST_USER_PROTOCOL_F_DEVICE_STATE |
> - 1ULL << VHOST_USER_PROTOCOL_F_RARP;
> + 1ULL << VHOST_USER_PROTOCOL_F_RARP |
> + 1ULL << VHOST_USER_PROTOCOL_F_MQ;
>
> (void)vdev;
> vmsg_set_reply_u64(vmsg, features);
> @@ -938,10 +943,9 @@ static bool vu_get_queue_num_exec(struct vu_dev *vdev,
> {
> (void)vdev;
>
> - /* NOLINTNEXTLINE(misc-redundant-expression) */
> vmsg_set_reply_u64(vmsg, VHOST_USER_MAX_VQS / 2);
>
> - debug("VHOST_USER_MAX_VQS %u", VHOST_USER_MAX_VQS / 2);
> + debug("Using up to %u vhost-user queue pairs", VHOST_USER_MAX_VQS / 2);
>
> return true;
> }
> diff --git a/vhost_user.h b/vhost_user.h
> index e806a9e54e00..d2e51d3e86c3 100644
> --- a/vhost_user.h
> +++ b/vhost_user.h
> @@ -201,15 +201,6 @@ struct vhost_user_msg {
> } __attribute__ ((__packed__));
> #define VHOST_USER_HDR_SIZE sizeof(struct vhost_user_header)
>
> -/* index of the RX virtqueue */
> -#define VHOST_USER_RX_QUEUE 0
> -/* index of the TX virtqueue */
> -#define VHOST_USER_TX_QUEUE 1
> -
> -/* in case of multiqueue, the RX and TX queues are interleaved */
> -#define VHOST_USER_IS_QUEUE_TX(n) (n % 2)
> -#define VHOST_USER_IS_QUEUE_RX(n) (!(n % 2))
> -
> /* Default virtio-net header for passt */
> #define VU_HEADER ((struct virtio_net_hdr){ \
> .flags = VIRTIO_NET_HDR_F_DATA_VALID, \
> diff --git a/virtio.h b/virtio.h
> index 8f2ae068d5ba..eb7570e63cd7 100644
> --- a/virtio.h
> +++ b/virtio.h
> @@ -90,7 +90,7 @@ struct vu_dev_region {
> uint64_t mmap_addr;
> };
>
> -#define VHOST_USER_MAX_VQS 2U
> +#define VHOST_USER_MAX_VQS 32U
>
> /*
> * Set a reasonable maximum number of ram slots, which will be supported by
> diff --git a/vu_common.c b/vu_common.c
> index d07f584f228a..8b555ea9a8b1 100644
> --- a/vu_common.c
> +++ b/vu_common.c
> @@ -175,7 +175,8 @@ static void vu_handle_tx(struct vu_dev *vdev, int index,
> int out_sg_count;
> int count;
>
> - assert(VHOST_USER_IS_QUEUE_TX(index));
> + assert(QPAIR_FROMGUEST_QUEUE(QPAIR_FROM_QUEUE(index)) ==
> + (unsigned int)index);
Similar comment here.
>
> tap_flush_pools();
>
> @@ -233,28 +234,32 @@ void vu_kick_cb(struct vu_dev *vdev, union epoll_ref ref,
>
> trace("vhost-user: got kick_data: %016"PRIx64" idx: %d",
> kick_data, ref.queue);
> - if (VHOST_USER_IS_QUEUE_TX(ref.queue))
> + if (QPAIR_FROMGUEST_QUEUE(QPAIR_FROM_QUEUE(ref.queue)) ==
> + (unsigned int)ref.queue)
> vu_handle_tx(vdev, ref.queue, now);
And here.
> }
>
> /**
> - * vu_send_single() - Send a buffer to the front-end using the RX virtqueue
> - * @c: execution context
> + * vu_send_single() - Send a buffer to the front-end using a specified virtqueue
> + * @c: Execution context
> + * @qpair: Queue pair on which to send the buffer
> * @buf: address of the buffer
> * @size: size of the buffer
> *
> * Return: number of bytes sent, -1 if there is an error
> */
> -int vu_send_single(const struct ctx *c, const void *buf, size_t size)
> +int vu_send_single(const struct ctx *c, unsigned int qpair, const void *buf, size_t size)
> {
> struct vu_dev *vdev = c->vdev;
> - struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
> struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE];
> struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
> + struct vu_virtq *vq;
> size_t total, in_total;
> int elem_cnt;
> int i;
>
> + vq = &vdev->vq[QPAIR_TOGUEST_QUEUE(qpair)];
> +
> trace("vu_send_single size %zu", size);
>
> if (!vu_queue_enabled(vq) || !vu_queue_started(vq)) {
> diff --git a/vu_common.h b/vu_common.h
> index 817384175a1d..f5603d9ddeb6 100644
> --- a/vu_common.h
> +++ b/vu_common.h
> @@ -23,7 +23,8 @@ void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq,
> struct vu_virtq_element *elem, int elem_cnt, size_t frame_len);
> void vu_kick_cb(struct vu_dev *vdev, union epoll_ref ref,
> const struct timespec *now);
> -int vu_send_single(const struct ctx *c, const void *buf, size_t size);
> +int vu_send_single(const struct ctx *c, unsigned int qpair, const void *buf,
> + size_t size);
> void vu_pad(const struct iovec *iov, size_t cnt, size_t frame_len);
>
> #endif /* VU_COMMON_H */
> --
> 2.54.0
>
--
David Gibson (he or they) | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you, not the other way
| around.
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
next prev parent reply other threads:[~2026-06-19 5:30 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-16 12:51 [PATCH v5 00/12] vhost-user: Add " Laurent Vivier
2026-06-16 12:51 ` [PATCH v5 01/12] tap: Remove pool parameter from tap4_handler() and tap6_handler() Laurent Vivier
2026-06-16 12:51 ` [PATCH v5 02/12] vhost-user: Advertise multiqueue support Laurent Vivier
2026-06-19 5:17 ` David Gibson [this message]
2026-06-16 12:51 ` [PATCH v5 03/12] test: Add multiqueue support to vhost-user test infrastructure Laurent Vivier
2026-06-19 5:06 ` David Gibson
2026-06-16 12:51 ` [PATCH v5 04/12] tap: Thread queue pair through all remaining tap paths Laurent Vivier
2026-06-19 5:37 ` David Gibson
2026-06-16 12:51 ` [PATCH v5 05/12] arp: Pass queue pair explicitly through ARP send path Laurent Vivier
2026-06-19 5:40 ` David Gibson
2026-06-16 12:51 ` [PATCH v5 06/12] tcp: Pass queue pair explicitly through TCP " Laurent Vivier
2026-06-19 6:00 ` David Gibson
2026-06-16 12:51 ` [PATCH v5 07/12] udp: Pass queue pair explicitly through UDP " Laurent Vivier
2026-06-19 6:08 ` David Gibson
2026-06-16 12:51 ` [PATCH v5 08/12] dhcp/dhcpv6: Pass queue pair explicitly through DHCP " Laurent Vivier
2026-06-19 6:10 ` David Gibson
2026-06-16 12:51 ` [PATCH v5 09/12] icmp: Pass queue pair explicitly through ICMP " Laurent Vivier
2026-06-19 6:12 ` David Gibson
2026-06-16 12:51 ` [PATCH v5 10/12] ndp: Pass queue pair explicitly through NDP " Laurent Vivier
2026-06-19 6:17 ` David Gibson
2026-06-16 12:51 ` [PATCH v5 11/12] flow: Add queue pair tracking to flow management Laurent Vivier
2026-06-19 6:36 ` David Gibson
2026-06-16 12:51 ` [PATCH v5 12/12] flow: Derive epoll fd from queue pair, removing epollid field Laurent Vivier
2026-06-19 6:52 ` David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ajTRAdY3qECRUN0A@zatzit \
--to=david@gibson.dropbear.id.au \
--cc=lvivier@redhat.com \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).