From: David Gibson <david@gibson.dropbear.id.au>
To: Laurent Vivier <lvivier@redhat.com>
Cc: passt-dev@passt.top
Subject: Re: [PATCH v5 06/12] tcp: Pass queue pair explicitly through TCP send path
Date: Fri, 19 Jun 2026 16:00:42 +1000 [thread overview]
Message-ID: <ajTbChyykMCYiOHm@zatzit> (raw)
In-Reply-To: <20260616125130.1324274-7-lvivier@redhat.com>
[-- Attachment #1: Type: text/plain, Size: 36905 bytes --]
On Tue, Jun 16, 2026 at 02:51:24PM +0200, Laurent Vivier wrote:
> Thread a qpair parameter from the entry points (tcp_sock_handler,
> tcp_timer_handler, tcp_tap_handler, tcp_defer_handler) through every
> intermediate function down to the vhost-user send functions, so callers
> explicitly select the target RX virtqueue instead of hardcoding
> QPAIR_DEFAULT.
>
> Add a qpair parameter to tcp_send_flag(), tcp_data_from_sock(),
> tcp_rst_do() and its tcp_rst() macro, tcp_rewind_seq(),
> tcp_data_from_tap(), tcp_conn_from_sock_finish(), tcp_connect_finish(),
> tcp_tap_window_update(), tcp_conn_from_tap(), tcp_rst_no_conn(),
> tcp_keepalive(), and tcp_inactivity().
For the to-guest functions which take a connection parameter, this
seems odd to me. Can't they deduce the right queue from the
connection?
It makes sense, obviously, for anything without a connection
parameter. And it makes sense to me for from-guest functions, since
the guest might have migrated the connection to a different queue
since last we heard about it.
> tcp_vu_send_flag() and tcp_vu_data_from_sock() now use the passed
> qpair to select the RX virtqueue instead of always using
> QPAIR_DEFAULT.
>
> The buffer-based path (tcp_buf.c) does not thread qpair since it is
> only used in non-vhost-user mode.
>
> No functional change.
>
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> ---
> passt.c | 12 +--
> tap.c | 8 +-
> tcp.c | 199 ++++++++++++++++++++++++++++---------------------
> tcp.h | 15 ++--
> tcp_buf.c | 10 +--
> tcp_internal.h | 7 +-
> tcp_vu.c | 19 +++--
> tcp_vu.h | 6 +-
> 8 files changed, 157 insertions(+), 119 deletions(-)
>
> diff --git a/passt.c b/passt.c
> index 71eb4f0192e2..9569f920ee28 100644
> --- a/passt.c
> +++ b/passt.c
> @@ -98,11 +98,13 @@ struct passt_stats {
> * post_handler() - Run periodic and deferred tasks for L4 protocol handlers
> * @c: Execution context
> * @now: Current timestamp
> + * @qpair: Queue pair to process
> */
> -static void post_handler(struct ctx *c, const struct timespec *now)
> +static void post_handler(struct ctx *c, const struct timespec *now,
> + unsigned int qpair)
> {
> if (!c->no_tcp)
> - tcp_defer_handler(c, now);
> + tcp_defer_handler(c, now, qpair);
>
> flow_defer_handler(c, now);
> fwd_scan_ports_timer(c, now);
> @@ -251,7 +253,7 @@ static void passt_worker(void *opaque, int nfds, struct epoll_event *events)
> pasta_netns_quit_timer_handler(c, ref);
> break;
> case EPOLL_TYPE_TCP:
> - tcp_sock_handler(c, ref, eventmask);
> + tcp_sock_handler(c, ref, eventmask, QPAIR_DEFAULT);
> break;
> case EPOLL_TYPE_TCP_SPLICE:
> tcp_splice_sock_handler(c, ref, eventmask, &now);
> @@ -260,7 +262,7 @@ static void passt_worker(void *opaque, int nfds, struct epoll_event *events)
> tcp_listen_handler(c, ref, &now);
> break;
> case EPOLL_TYPE_TCP_TIMER:
> - tcp_timer_handler(c, ref);
> + tcp_timer_handler(c, ref, QPAIR_DEFAULT);
> break;
> case EPOLL_TYPE_UDP_LISTEN:
> udp_listen_sock_handler(c, ref, eventmask, &now);
> @@ -300,7 +302,7 @@ static void passt_worker(void *opaque, int nfds, struct epoll_event *events)
> print_stats(c, &stats, &now);
> }
>
> - post_handler(c, &now);
> + post_handler(c, &now, QPAIR_DEFAULT);
>
> migrate_handler(c);
> }
> diff --git a/tap.c b/tap.c
> index 66dcb83665a7..ba2a573fa630 100644
> --- a/tap.c
> +++ b/tap.c
> @@ -717,8 +717,6 @@ static int tap4_handler(struct ctx *c, unsigned int qpair,
> unsigned int i, j, seq_count;
> struct tap4_l4_t *seq;
>
> - (void)qpair;
> -
> if (!c->ifi4 || !pool_tap4->count)
> return pool_tap4->count;
>
> @@ -870,7 +868,7 @@ append:
> if (c->no_tcp)
> continue;
> for (k = 0; k < p->count; )
> - k += tcp_tap_handler(c, PIF_TAP, AF_INET,
> + k += tcp_tap_handler(c, qpair, PIF_TAP, AF_INET,
> &seq->saddr, &seq->daddr,
> 0, p, k, now);
> } else if (seq->protocol == IPPROTO_UDP) {
> @@ -955,8 +953,6 @@ static int tap6_handler(struct ctx *c, unsigned int qpair,
> unsigned int i, j, seq_count = 0;
> struct tap6_l4_t *seq;
>
> - (void)qpair;
> -
> if (!c->ifi6 || !pool_tap6->count)
> return pool_tap6->count;
>
> @@ -1121,7 +1117,7 @@ append:
> if (c->no_tcp)
> continue;
> for (k = 0; k < p->count; )
> - k += tcp_tap_handler(c, PIF_TAP, AF_INET6,
> + k += tcp_tap_handler(c, qpair, PIF_TAP, AF_INET6,
> &seq->saddr, &seq->daddr,
> seq->flow_lbl, p, k, now);
> } else if (seq->protocol == IPPROTO_UDP) {
> diff --git a/tcp.c b/tcp.c
> index c127b3132e5a..7f8e68a31994 100644
> --- a/tcp.c
> +++ b/tcp.c
> @@ -1258,16 +1258,18 @@ static void tcp_update_seqack_from_tap(const struct ctx *c,
> * tcp_rewind_seq() - Rewind sequence to tap and socket offset to current ACK
> * @c: Execution context
> * @conn: Connection pointer
> + * @qpair: Queue pair to process
> *
> * Return: 0 on success, -1 on failure, with connection reset
> */
> -static int tcp_rewind_seq(const struct ctx *c, struct tcp_tap_conn *conn)
> +static int tcp_rewind_seq(const struct ctx *c, struct tcp_tap_conn *conn,
> + unsigned int qpair)
> {
> conn->seq_to_tap = conn->seq_ack_from_tap;
> conn->events &= ~TAP_FIN_SENT;
>
> if (tcp_set_peek_offset(conn, 0)) {
> - tcp_rst(c, conn);
> + tcp_rst(c, conn, qpair);
> return -1;
> }
>
> @@ -1371,16 +1373,17 @@ int tcp_prepare_flags(const struct ctx *c, struct tcp_tap_conn *conn,
> * @c: Execution context
> * @conn: Connection pointer
> * @flags: TCP flags: if not set, send segment only if ACK is due
> + * @qpair: Queue pair to process
> *
> * Return: negative error code on fatal connection failure, 0 otherwise
> */
> static int tcp_send_flag(const struct ctx *c, struct tcp_tap_conn *conn,
> - int flags)
> + int flags, unsigned int qpair)
> {
> int ret;
>
> if (c->mode == MODE_VU)
> - ret = tcp_vu_send_flag(c, conn, flags);
> + ret = tcp_vu_send_flag(c, conn, flags, qpair);
> else
> ret = tcp_buf_send_flag(c, conn, flags);
>
> @@ -1425,14 +1428,16 @@ static void tcp_sock_rst(const struct ctx *c, struct tcp_tap_conn *conn)
> * tcp_rst_do() - Reset a tap connection: send RST segment on both sides, close
> * @c: Execution context
> * @conn: Connection pointer
> + * @qpair: Queue pair to process
> */
> -void tcp_rst_do(const struct ctx *c, struct tcp_tap_conn *conn)
> +void tcp_rst_do(const struct ctx *c, struct tcp_tap_conn *conn,
> + unsigned int qpair)
> {
> if (conn->events == CLOSED)
> return;
>
> /* Send RST on tap */
> - tcp_send_flag(c, conn, RST);
> + tcp_send_flag(c, conn, RST, qpair);
>
> tcp_sock_rst(c, conn);
> }
> @@ -1459,11 +1464,13 @@ static void tcp_get_tap_ws(struct tcp_tap_conn *conn,
> * @c: Execution context
> * @conn: Connection pointer
> * @wnd: Window value, host order, unscaled
> + * @qpair: Queue pair to process
> *
> * Return: false on zero window (not stored to wnd_from_tap), true otherwise
> */
> static bool tcp_tap_window_update(const struct ctx *c,
> - struct tcp_tap_conn *conn, unsigned wnd)
> + struct tcp_tap_conn *conn, unsigned wnd,
> + unsigned int qpair)
In particular threading the qpair through this function, that's
chiefly not about sending _or_ receiving seems odd..
> {
> wnd = MIN(MAX_WINDOW, wnd << conn->ws_from_tap);
>
> @@ -1474,7 +1481,7 @@ static bool tcp_tap_window_update(const struct ctx *c,
> * that no data beyond the updated window will be acknowledged.
> */
> if (!wnd && SEQ_LT(conn->seq_ack_from_tap, conn->seq_to_tap)) {
> - tcp_rewind_seq(c, conn);
> + tcp_rewind_seq(c, conn, qpair);
.. just to pass here, which only cares about it for an error case.
> return false;
> }
>
> @@ -1646,6 +1653,7 @@ static void tcp_bind_outbound(const struct ctx *c,
> /**
> * tcp_conn_from_tap() - Handle connection request (SYN segment) from tap
> * @c: Execution context
> + * @qpair: Queue pair for the flow
> * @af: Address family, AF_INET or AF_INET6
> * @saddr: Source address, pointer to in_addr or in6_addr
> * @daddr: Destination address, pointer to in_addr or in6_addr
> @@ -1656,10 +1664,11 @@ static void tcp_bind_outbound(const struct ctx *c,
> *
> * #syscalls:vu getsockname
> */
> -static void tcp_conn_from_tap(const struct ctx *c, sa_family_t af,
> - const void *saddr, const void *daddr,
> - const struct tcphdr *th, const char *opts,
> - size_t optlen, const struct timespec *now)
> +static void tcp_conn_from_tap(const struct ctx *c, unsigned int qpair,
> + sa_family_t af, const void *saddr,
> + const void *daddr, const struct tcphdr *th,
> + const char *opts, size_t optlen,
> + const struct timespec *now)
> {
> in_port_t srcport = ntohs(th->source);
> in_port_t dstport = ntohs(th->dest);
> @@ -1760,7 +1769,7 @@ static void tcp_conn_from_tap(const struct ctx *c, sa_family_t af,
>
> if (connect(s, &sa.sa, socklen_inany(&sa))) {
> if (errno != EINPROGRESS) {
> - tcp_rst(c, conn);
> + tcp_rst(c, conn, qpair);
> goto cancel;
> }
>
> @@ -1768,7 +1777,7 @@ static void tcp_conn_from_tap(const struct ctx *c, sa_family_t af,
> } else {
> tcp_get_sndbuf(conn);
>
> - if (tcp_send_flag(c, conn, SYN | ACK))
> + if (tcp_send_flag(c, conn, SYN | ACK, qpair))
> goto cancel;
>
> conn_event(c, conn, TAP_SYN_ACK_SENT);
> @@ -1830,15 +1839,17 @@ static int tcp_sock_consume(const struct tcp_tap_conn *conn, uint32_t ack_seq)
> * tcp_data_from_sock() - Handle new data from socket, queue to tap, in window
> * @c: Execution context
> * @conn: Connection pointer
> + * @qpair: Queue pair to process
> *
> * Return: negative on connection reset, 0 otherwise
> *
> * #syscalls recvmsg
> */
> -static int tcp_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
> +static int tcp_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn,
> + unsigned int qpair)
> {
> if (c->mode == MODE_VU)
> - return tcp_vu_data_from_sock(c, conn);
> + return tcp_vu_data_from_sock(c, conn, qpair);
>
> return tcp_buf_data_from_sock(c, conn);
> }
> @@ -1866,13 +1877,15 @@ static ssize_t tcp_packet_data_len(const struct tcphdr *th, size_t l4len)
> * @conn: Connection pointer
> * @p: Pool of TCP packets, with TCP headers
> * @idx: Index of first data packet in pool
> + * @qpair: Queue pair to process
> *
> * #syscalls sendmsg
> *
> * Return: count of consumed packets
> */
> static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn,
> - const struct pool *p, int idx)
> + const struct pool *p, int idx,
> + unsigned int qpair)
> {
> int i, iov_i, ack = 0, fin = 0, retr = 0, keep = -1, partial_send = 0;
> uint16_t max_ack_seq_wnd = conn->wnd_from_tap;
> @@ -1922,7 +1935,7 @@ static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn,
> "keep-alive sequence: %u, previous: %u",
> seq, conn->seq_from_tap);
>
> - if (tcp_send_flag(c, conn, ACK))
> + if (tcp_send_flag(c, conn, ACK, qpair))
> return -1;
>
> tcp_timer_ctl(c, conn);
> @@ -1933,7 +1946,8 @@ static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn,
>
> if (p->count == 1) {
> tcp_tap_window_update(c, conn,
> - ntohs(th->window));
> + ntohs(th->window),
> + qpair);
> return 1;
> }
>
> @@ -1959,7 +1973,7 @@ static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn,
> * well.
> */
> if (!ntohs(th->window))
> - tcp_rewind_seq(c, conn);
> + tcp_rewind_seq(c, conn, qpair);
>
> max_ack_seq_wnd = ntohs(th->window);
> max_ack_seq = ack_seq;
> @@ -2024,17 +2038,17 @@ static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn,
> if (ack && !tcp_sock_consume(conn, max_ack_seq))
> tcp_update_seqack_from_tap(c, conn, max_ack_seq);
>
> - tcp_tap_window_update(c, conn, max_ack_seq_wnd);
> + tcp_tap_window_update(c, conn, max_ack_seq_wnd, qpair);
>
> if (retr) {
> flow_trace(conn,
> "fast re-transmit, ACK: %u, previous sequence: %u",
> conn->seq_ack_from_tap, conn->seq_to_tap);
>
> - if (tcp_rewind_seq(c, conn))
> + if (tcp_rewind_seq(c, conn, qpair))
> return -1;
>
> - tcp_data_from_sock(c, conn);
> + tcp_data_from_sock(c, conn, qpair);
> }
>
> if (!iov_i)
> @@ -2050,7 +2064,7 @@ eintr:
> * Then swiftly looked away and left.
> */
> conn->seq_from_tap = seq_from_tap;
> - if (tcp_send_flag(c, conn, ACK))
> + if (tcp_send_flag(c, conn, ACK, qpair))
> return -1;
> }
>
> @@ -2058,7 +2072,7 @@ eintr:
> goto eintr;
>
> if (errno == EAGAIN || errno == EWOULDBLOCK) {
> - if (tcp_send_flag(c, conn, ACK | DUP_ACK))
> + if (tcp_send_flag(c, conn, ACK | DUP_ACK, qpair))
> return -1;
>
> uint32_t events = tcp_conn_epoll_events(conn->events,
> @@ -2094,7 +2108,7 @@ out:
> */
> if (conn->seq_dup_ack_approx != (conn->seq_from_tap & 0xff)) {
> conn->seq_dup_ack_approx = conn->seq_from_tap & 0xff;
> - if (tcp_send_flag(c, conn, ACK | DUP_ACK))
> + if (tcp_send_flag(c, conn, ACK | DUP_ACK, qpair))
> return -1;
> }
> return p->count - idx;
> @@ -2109,7 +2123,7 @@ out:
>
> conn_event(c, conn, TAP_FIN_RCVD);
> } else {
> - if (tcp_send_flag(c, conn, ACK_IF_NEEDED))
> + if (tcp_send_flag(c, conn, ACK_IF_NEEDED, qpair))
> return -1;
> }
>
> @@ -2123,13 +2137,15 @@ out:
> * @th: TCP header of SYN, ACK segment: caller MUST ensure it's there
> * @opts: Pointer to start of options
> * @optlen: Bytes in options: caller MUST ensure available length
> + * @qpair: Queue pair to process
> */
> static void tcp_conn_from_sock_finish(const struct ctx *c,
> struct tcp_tap_conn *conn,
> const struct tcphdr *th,
> - const char *opts, size_t optlen)
> + const char *opts, size_t optlen,
> + unsigned int qpair)
> {
> - tcp_tap_window_update(c, conn, ntohs(th->window));
> + tcp_tap_window_update(c, conn, ntohs(th->window), qpair);
> tcp_get_tap_ws(conn, opts, optlen);
>
> /* First value is not scaled */
> @@ -2144,24 +2160,25 @@ static void tcp_conn_from_sock_finish(const struct ctx *c,
>
> conn_event(c, conn, ESTABLISHED);
> if (tcp_set_peek_offset(conn, 0)) {
> - tcp_rst(c, conn);
> + tcp_rst(c, conn, qpair);
> return;
> }
>
> - if (tcp_send_flag(c, conn, ACK)) {
> - tcp_rst(c, conn);
> + if (tcp_send_flag(c, conn, ACK, qpair)) {
> + tcp_rst(c, conn, qpair);
> return;
> }
>
> /* The client might have sent data already, which we didn't
> * dequeue waiting for SYN,ACK from tap -- check now.
> */
> - tcp_data_from_sock(c, conn);
> + tcp_data_from_sock(c, conn, qpair);
> }
>
> /**
> * tcp_rst_no_conn() - Send RST in response to a packet with no connection
> * @c: Execution context
> + * @qpair: Queue pair on which to send the reply
> * @af: Address family, AF_INET or AF_INET6
> * @saddr: Source address of the packet we're responding to
> * @daddr: Destination address of the packet we're responding to
> @@ -2169,7 +2186,7 @@ static void tcp_conn_from_sock_finish(const struct ctx *c,
> * @th: TCP header of the packet we're responding to
> * @l4len: Packet length, including TCP header
> */
> -static void tcp_rst_no_conn(const struct ctx *c, int af,
> +static void tcp_rst_no_conn(const struct ctx *c, unsigned int qpair, int af,
> const void *saddr, const void *daddr,
> uint32_t flow_lbl,
> const struct tcphdr *th, size_t l4len)
> @@ -2227,12 +2244,13 @@ static void tcp_rst_no_conn(const struct ctx *c, int af,
>
> tcp_update_csum(psum, rsth, &payload, 0);
> rst_l2len = ((char *)rsth - buf) + sizeof(*rsth);
> - tap_send_single(c, QPAIR_DEFAULT, buf, rst_l2len);
> + tap_send_single(c, qpair, buf, rst_l2len);
> }
>
> /**
> * tcp_tap_handler() - Handle packets from tap and state transitions
> * @c: Execution context
> + * @qpair: Queue pair on which to send packets
> * @pif: pif on which the packet is arriving
> * @af: Address family, AF_INET or AF_INET6
> * @saddr: Source address
> @@ -2244,9 +2262,10 @@ static void tcp_rst_no_conn(const struct ctx *c, int af,
> *
> * Return: count of consumed packets
> */
> -int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
> - const void *saddr, const void *daddr, uint32_t flow_lbl,
> - const struct pool *p, int idx, const struct timespec *now)
> +int tcp_tap_handler(const struct ctx *c, unsigned int qpair, uint8_t pif,
> + sa_family_t af, const void *saddr, const void *daddr,
> + uint32_t flow_lbl, const struct pool *p, int idx,
> + const struct timespec *now)
> {
> struct tcp_tap_conn *conn;
> struct tcphdr th_storage;
> @@ -2283,10 +2302,11 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
> /* New connection from tap */
> if (!flow) {
> if (opts && th->syn && !th->ack)
> - tcp_conn_from_tap(c, af, saddr, daddr, th,
> + tcp_conn_from_tap(c, qpair, af, saddr, daddr, th,
> opts, optlen, now);
> else
> - tcp_rst_no_conn(c, af, saddr, daddr, flow_lbl, th, l4len);
> + tcp_rst_no_conn(c, qpair, af, saddr, daddr, flow_lbl, th,
> + l4len);
> return 1;
> }
>
> @@ -2310,7 +2330,7 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
> /* Establishing connection from socket */
> if (conn->events & SOCK_ACCEPTED) {
> if (th->syn && th->ack && !th->fin) {
> - tcp_conn_from_sock_finish(c, conn, th, opts, optlen);
> + tcp_conn_from_sock_finish(c, conn, th, opts, optlen, qpair);
> return 1;
> }
>
> @@ -2337,7 +2357,7 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
> goto reset;
> }
>
> - if (tcp_send_flag(c, conn, ACK))
> + if (tcp_send_flag(c, conn, ACK, qpair))
> goto reset;
>
> conn_event(c, conn, SOCK_FIN_SENT);
> @@ -2348,8 +2368,8 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
> if (!th->ack)
> goto reset;
>
> - if (tcp_tap_window_update(c, conn, ntohs(th->window)))
> - tcp_data_from_sock(c, conn);
> + if (tcp_tap_window_update(c, conn, ntohs(th->window), qpair))
> + tcp_data_from_sock(c, conn, qpair);
>
> if (p->count - idx == 1)
> return 1;
> @@ -2380,12 +2400,12 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
> "fast re-transmit, ACK: %u, previous sequence: %u",
> ntohl(th->ack_seq), conn->seq_to_tap);
>
> - if (tcp_rewind_seq(c, conn))
> + if (tcp_rewind_seq(c, conn, qpair))
> return -1;
> }
>
> - if (tcp_tap_window_update(c, conn, ntohs(th->window)) || retr)
> - tcp_data_from_sock(c, conn);
> + if (tcp_tap_window_update(c, conn, ntohs(th->window), qpair) || retr)
> + tcp_data_from_sock(c, conn, qpair);
>
> if (conn->seq_ack_from_tap == conn->seq_to_tap) {
> if (th->ack && conn->events & TAP_FIN_SENT)
> @@ -2400,7 +2420,7 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
> }
>
> /* Established connections accepting data from tap */
> - count = tcp_data_from_tap(c, conn, p, idx);
> + count = tcp_data_from_tap(c, conn, p, idx, qpair);
> if (count == -1)
> goto reset;
>
> @@ -2419,7 +2439,7 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
> }
>
> conn_event(c, conn, SOCK_FIN_SENT);
> - if (tcp_send_flag(c, conn, ACK))
> + if (tcp_send_flag(c, conn, ACK, qpair))
> goto reset;
>
> ack_due = 0;
> @@ -2449,7 +2469,7 @@ reset:
> * remaining packets in the batch, since they'd be invalidated when our
> * RST is received, even if otherwise good.
> */
> - tcp_rst(c, conn);
> + tcp_rst(c, conn, qpair);
> return p->count - idx;
> }
>
> @@ -2457,20 +2477,22 @@ reset:
> * tcp_connect_finish() - Handle completion of connect() from EPOLLOUT event
> * @c: Execution context
> * @conn: Connection pointer
> + * @qpair: Queue pair to process
> */
> -static void tcp_connect_finish(const struct ctx *c, struct tcp_tap_conn *conn)
> +static void tcp_connect_finish(const struct ctx *c, struct tcp_tap_conn *conn,
> + unsigned int qpair)
> {
> socklen_t sl;
> int so;
>
> sl = sizeof(so);
> if (getsockopt(conn->sock, SOL_SOCKET, SO_ERROR, &so, &sl) || so) {
> - tcp_rst(c, conn);
> + tcp_rst(c, conn, qpair);
> return;
> }
>
> - if (tcp_send_flag(c, conn, SYN | ACK)) {
> - tcp_rst(c, conn);
> + if (tcp_send_flag(c, conn, SYN | ACK, qpair)) {
> + tcp_rst(c, conn, qpair);
> return;
> }
>
> @@ -2513,7 +2535,7 @@ static void tcp_tap_conn_from_sock(const struct ctx *c, union flow *flow,
>
> conn->wnd_from_tap = WINDOW_DEFAULT;
>
> - if (tcp_send_flag(c, conn, SYN)) {
> + if (tcp_send_flag(c, conn, SYN, QPAIR_DEFAULT)) {
> conn_flag(c, conn, CLOSING);
> FLOW_ACTIVATE(conn);
> return;
> @@ -2604,12 +2626,14 @@ cancel:
> * tcp_timer_handler() - timerfd events: close, send ACK, retransmit, or reset
> * @c: Execution context
> * @ref: epoll reference of timer (not connection)
> + * @qpair: Queue pair to process
> *
> * #syscalls timerfd_gettime|timerfd_gettime64
> * #syscalls arm:timerfd_gettime64 i686:timerfd_gettime64
> * #syscalls arm:timerfd_settime64 i686:timerfd_settime64
> */
> -void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
> +void tcp_timer_handler(const struct ctx *c, union epoll_ref ref,
> + unsigned int qpair)
This one seems even stranger to me. This is triggered off a timer,
not a tap side event, how would we even know the relevant qpair except
by looking at the connection.
> {
> struct itimerspec check_armed = { { 0 }, { 0 } };
> struct tcp_tap_conn *conn = &FLOW(ref.flow)->tcp;
> @@ -2628,8 +2652,8 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
> return;
>
> if (conn->flags & ACK_TO_TAP_DUE) {
> - if (tcp_send_flag(c, conn, ACK_IF_NEEDED)) {
> - tcp_rst(c, conn);
> + if (tcp_send_flag(c, conn, ACK_IF_NEEDED, qpair)) {
> + tcp_rst(c, conn, qpair);
> return;
> }
> tcp_timer_ctl(c, conn);
> @@ -2641,11 +2665,11 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
> max = MIN(TCP_MAX_RETRIES, max);
> if (conn->retries >= max) {
> flow_dbg(conn, "handshake timeout");
> - tcp_rst(c, conn);
> + tcp_rst(c, conn, qpair);
> } else {
> flow_trace(conn, "SYN timeout, retry");
> - if (tcp_send_flag(c, conn, SYN)) {
> - tcp_rst(c, conn);
> + if (tcp_send_flag(c, conn, SYN, qpair)) {
> + tcp_rst(c, conn, qpair);
> return;
> }
> conn->retries++;
> @@ -2654,7 +2678,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
> }
> } else if (conn->retries == TCP_MAX_RETRIES) {
> flow_dbg(conn, "retransmissions count exceeded");
> - tcp_rst(c, conn);
> + tcp_rst(c, conn, qpair);
> } else {
> flow_dbg(conn, "ACK timeout, retry");
>
> @@ -2662,10 +2686,10 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
> conn->wnd_from_tap = 1; /* Zero-window probe */
>
> conn->retries++;
> - if (tcp_rewind_seq(c, conn))
> + if (tcp_rewind_seq(c, conn, qpair))
> return;
>
> - tcp_data_from_sock(c, conn);
> + tcp_data_from_sock(c, conn, qpair);
> tcp_timer_ctl(c, conn);
> }
> }
> @@ -2676,9 +2700,10 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
> * @c: Execution context
> * @ref: epoll reference
> * @events: epoll events bitmap
> + * @qpair: Queue pair to process
> */
> void tcp_sock_handler(const struct ctx *c, union epoll_ref ref,
> - uint32_t events)
> + uint32_t events, unsigned int qpair)
Likewise here, how could the caller know a reasonable qpair based just
on the socket side event?
> {
> struct tcp_tap_conn *conn = conn_at_sidx(ref.flowside);
>
> @@ -2689,7 +2714,7 @@ void tcp_sock_handler(const struct ctx *c, union epoll_ref ref,
> return;
>
> if (events & EPOLLERR) {
> - tcp_rst(c, conn);
> + tcp_rst(c, conn, qpair);
> return;
> }
>
> @@ -2708,13 +2733,13 @@ void tcp_sock_handler(const struct ctx *c, union epoll_ref ref,
> conn_event(c, conn, SOCK_FIN_RCVD);
>
> if (events & EPOLLIN)
> - tcp_data_from_sock(c, conn);
> + tcp_data_from_sock(c, conn, qpair);
>
> if (events & EPOLLOUT) {
> tcp_epoll_ctl(conn);
> if (tcp_update_seqack_wnd(c, conn, false, NULL) &&
> - tcp_send_flag(c, conn, ACK)) {
> - tcp_rst(c, conn);
> + tcp_send_flag(c, conn, ACK, qpair)) {
> + tcp_rst(c, conn, qpair);
> return;
> }
> }
> @@ -2724,7 +2749,7 @@ void tcp_sock_handler(const struct ctx *c, union epoll_ref ref,
>
> /* EPOLLHUP during handshake: reset */
> if (events & EPOLLHUP) {
> - tcp_rst(c, conn);
> + tcp_rst(c, conn, qpair);
> return;
> }
>
> @@ -2734,7 +2759,7 @@ void tcp_sock_handler(const struct ctx *c, union epoll_ref ref,
>
> if (conn->events == TAP_SYN_RCVD) {
> if (events & EPOLLOUT)
> - tcp_connect_finish(c, conn);
> + tcp_connect_finish(c, conn, qpair);
> /* Data? Check later */
> }
> }
> @@ -2939,9 +2964,11 @@ int tcp_init(struct ctx *c)
>
> /**
> * tcp_keepalive() - Send keepalives for connections which need it
> - * @: Execution context
> + * @c: Execution context
> + * @qpair: Queue pair to process
> */
> -static void tcp_keepalive(struct ctx *c, const struct timespec *now)
> +static void tcp_keepalive(struct ctx *c, const struct timespec *now,
> + unsigned int qpair)
Here we're scanning the flow table, and each connection there could
be associated with a different queue, so again, this parameter doesn't
make sense to me.
> {
> union flow *flow;
>
> @@ -2956,8 +2983,8 @@ static void tcp_keepalive(struct ctx *c, const struct timespec *now)
> if (conn->tap_inactive) {
> flow_dbg(conn, "No tap activity for least %us, send keepalive",
> KEEPALIVE_INTERVAL);
> - if (tcp_send_flag(c, conn, KEEPALIVE))
> - tcp_rst(c, conn);
> + if (tcp_send_flag(c, conn, KEEPALIVE, qpair))
> + tcp_rst(c, conn, qpair);
> }
>
> /* Ready to check fot next interval */
> @@ -2967,9 +2994,11 @@ static void tcp_keepalive(struct ctx *c, const struct timespec *now)
>
> /**
> * tcp_inactivity() - Scan for and close long-inactive connections
> - * @: Execution context
> + * @c: Execution context
> + * @qpair: Queue pair to process
> */
> -static void tcp_inactivity(struct ctx *c, const struct timespec *now)
> +static void tcp_inactivity(struct ctx *c, const struct timespec *now,
> + unsigned int qpair)
Same here.
> {
> union flow *flow;
>
> @@ -2986,7 +3015,7 @@ static void tcp_inactivity(struct ctx *c, const struct timespec *now)
> /* No activity in this interval, reset */
> flow_dbg(conn, "Inactive for at least %us, resetting",
> INACTIVITY_INTERVAL);
> - tcp_rst(c, conn);
> + tcp_rst(c, conn, qpair);
> }
>
> /* Ready to check fot next interval */
> @@ -2998,9 +3027,11 @@ static void tcp_inactivity(struct ctx *c, const struct timespec *now)
> * tcp_defer_handler() - Handler for TCP deferred tasks
> * @c: Execution context
> * @now: Current timestamp
> + * @qpair: Queue pair to process
> */
> /* cppcheck-suppress [constParameterPointer, unmatchedSuppression] */
> -void tcp_defer_handler(struct ctx *c, const struct timespec *now)
> +void tcp_defer_handler(struct ctx *c, const struct timespec *now,
> + unsigned int qpair)
> {
> tcp_payload_flush(c);
>
> @@ -3013,8 +3044,8 @@ void tcp_defer_handler(struct ctx *c, const struct timespec *now)
> if (c->mode == MODE_PASTA)
> tcp_splice_refill(c);
>
> - tcp_keepalive(c, now);
> - tcp_inactivity(c, now);
> + tcp_keepalive(c, now, qpair);
> + tcp_inactivity(c, now, qpair);
> }
>
> /**
> @@ -3988,10 +4019,10 @@ int tcp_flow_migrate_target_ext(struct ctx *c, struct tcp_tap_conn *conn, int fd
> if (tcp_set_peek_offset(conn, peek_offset))
> goto fail;
>
> - if (tcp_send_flag(c, conn, ACK))
> + if (tcp_send_flag(c, conn, ACK, QPAIR_DEFAULT))
> goto fail;
>
> - tcp_data_from_sock(c, conn);
> + tcp_data_from_sock(c, conn, QPAIR_DEFAULT);
>
> if ((rc = tcp_epoll_ctl(conn))) {
> flow_dbg(conn,
> @@ -4009,7 +4040,7 @@ fail:
> }
>
> conn->flags = 0; /* Not waiting for ACK, don't schedule timer */
> - tcp_rst(c, conn);
> + tcp_rst(c, conn, QPAIR_DEFAULT);
>
> return 0;
> }
> diff --git a/tcp.h b/tcp.h
> index 3262a807e5d4..490f1b140e44 100644
> --- a/tcp.h
> +++ b/tcp.h
> @@ -18,18 +18,21 @@
>
> struct ctx;
>
> -void tcp_timer_handler(const struct ctx *c, union epoll_ref ref);
> +void tcp_timer_handler(const struct ctx *c, union epoll_ref ref,
> + unsigned int qpair);
> void tcp_listen_handler(const struct ctx *c, union epoll_ref ref,
> const struct timespec *now);
> void tcp_sock_handler(const struct ctx *c, union epoll_ref ref,
> - uint32_t events);
> -int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
> - const void *saddr, const void *daddr, uint32_t flow_lbl,
> - const struct pool *p, int idx, const struct timespec *now);
> + uint32_t events, unsigned int qpair);
> +int tcp_tap_handler(const struct ctx *c, unsigned int qpair, uint8_t pif,
> + sa_family_t af, const void *saddr, const void *daddr,
> + uint32_t flow_lbl, const struct pool *p, int idx,
> + const struct timespec *now);
> int tcp_listen(const struct ctx *c, uint8_t pif, unsigned rule,
> const union inany_addr *addr, const char *ifname, in_port_t port);
> int tcp_init(struct ctx *c);
> -void tcp_defer_handler(struct ctx *c, const struct timespec *now);
> +void tcp_defer_handler(struct ctx *c, const struct timespec *now,
> + unsigned int qpair);
>
> void tcp_update_l2_buf(const unsigned char *eth_d);
>
> diff --git a/tcp_buf.c b/tcp_buf.c
> index ca356089dc0b..ae8bebca5107 100644
> --- a/tcp_buf.c
> +++ b/tcp_buf.c
> @@ -124,7 +124,7 @@ static void tcp_revert_seq(const struct ctx *c, struct tcp_tap_conn **conns,
> conn->seq_to_tap = seq;
> peek_offset = conn->seq_to_tap - conn->seq_ack_from_tap;
> if (tcp_set_peek_offset(conn, peek_offset))
> - tcp_rst(c, conn);
> + tcp_rst(c, conn, QPAIR_DEFAULT);
> }
> }
>
> @@ -334,7 +334,7 @@ int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
> conn->seq_to_tap = conn->seq_ack_from_tap;
> already_sent = 0;
> if (tcp_set_peek_offset(conn, 0)) {
> - tcp_rst(c, conn);
> + tcp_rst(c, conn, QPAIR_DEFAULT);
> return -1;
> }
> }
> @@ -356,7 +356,7 @@ int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
> }
>
> if (tcp_prepare_iov(&mh_sock, iov_sock, already_sent, fill_bufs)) {
> - tcp_rst(c, conn);
> + tcp_rst(c, conn, QPAIR_DEFAULT);
> return -1;
> }
>
> @@ -381,7 +381,7 @@ int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
>
> if (len < 0) {
> if (errno != EAGAIN && errno != EWOULDBLOCK) {
> - tcp_rst(c, conn);
> + tcp_rst(c, conn, QPAIR_DEFAULT);
> return -errno;
> }
>
> @@ -410,7 +410,7 @@ int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
>
> ret = tcp_buf_send_flag(c, conn, FIN | ACK);
> if (ret) {
> - tcp_rst(c, conn);
> + tcp_rst(c, conn, QPAIR_DEFAULT);
> return ret;
> }
>
> diff --git a/tcp_internal.h b/tcp_internal.h
> index 40472c9973c8..22f8825adccc 100644
> --- a/tcp_internal.h
> +++ b/tcp_internal.h
> @@ -174,11 +174,12 @@ void conn_event_do(const struct ctx *c, struct tcp_tap_conn *conn,
> conn_event_do(c, conn, event); \
> } while (0)
>
> -void tcp_rst_do(const struct ctx *c, struct tcp_tap_conn *conn);
> -#define tcp_rst(c, conn) \
> +void tcp_rst_do(const struct ctx *c, struct tcp_tap_conn *conn,
> + unsigned int qpair);
> +#define tcp_rst(c, conn, qpair) \
> do { \
> flow_dbg((conn), "TCP reset at %s:%i", __func__, __LINE__); \
> - tcp_rst_do(c, conn); \
> + tcp_rst_do(c, conn, qpair); \
> } while (0)
>
> struct tcp_info_linux;
> diff --git a/tcp_vu.c b/tcp_vu.c
> index 9ef6b5242c9c..4f76f599156f 100644
> --- a/tcp_vu.c
> +++ b/tcp_vu.c
> @@ -116,15 +116,17 @@ static int tcp_vu_send_dup(const struct ctx *c, struct vu_virtq *vq,
> * @c: Execution context
> * @conn: Connection pointer
> * @flags: TCP flags: if not set, send segment only if ACK is due
> + * @qpair: Queue pair to process
> *
> * Return: -ECONNRESET on fatal connection error,
> * -EAGAIN if vhost-user buffers are unavailable,
> * 0 otherwise
> */
> -int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
> +int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags,
> + unsigned int qpair)
> {
> struct vu_dev *vdev = c->vdev;
> - int rx_queue = QPAIR_TOGUEST_QUEUE(QPAIR_DEFAULT);
> + int rx_queue = QPAIR_TOGUEST_QUEUE(qpair);
> struct vu_virtq *vq = &vdev->vq[rx_queue];
> size_t optlen, hdrlen, iov_cnt, iov_used;
> struct vu_virtq_element flags_elem[2];
> @@ -424,13 +426,14 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
> * in window
> * @c: Execution context
> * @conn: Connection pointer
> + * @qpair: Queue pair to process
> *
> * Return: negative on connection reset, 0 otherwise
> */
> -int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
> +int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn,
> + unsigned int qpair)
> {
> uint32_t wnd_scaled = conn->wnd_from_tap << conn->ws_from_tap;
> - unsigned int qpair = QPAIR_DEFAULT;
> int rx_queue = QPAIR_TOGUEST_QUEUE(qpair);
> struct vu_dev *vdev = c->vdev;
> struct vu_virtq *vq = &vdev->vq[rx_queue];
> @@ -454,7 +457,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
> conn->seq_to_tap = conn->seq_ack_from_tap;
> already_sent = 0;
> if (tcp_set_peek_offset(conn, 0)) {
> - tcp_rst(c, conn);
> + tcp_rst(c, conn, qpair);
> return -1;
> }
> }
> @@ -477,7 +480,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
> &elem_cnt, &frame_cnt);
> if (len < 0) {
> if (len != -EAGAIN && len != -EWOULDBLOCK) {
> - tcp_rst(c, conn);
> + tcp_rst(c, conn, qpair);
> return len;
> }
>
> @@ -498,9 +501,9 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
> /* See tcp_buf_data_from_sock() */
> conn->seq_ack_to_tap = conn->seq_from_tap;
>
> - ret = tcp_vu_send_flag(c, conn, FIN | ACK);
> + ret = tcp_vu_send_flag(c, conn, FIN | ACK, qpair);
> if (ret) {
> - tcp_rst(c, conn);
> + tcp_rst(c, conn, qpair);
> return ret;
> }
>
> diff --git a/tcp_vu.h b/tcp_vu.h
> index 6ab6057f352a..ae48420f4906 100644
> --- a/tcp_vu.h
> +++ b/tcp_vu.h
> @@ -6,7 +6,9 @@
> #ifndef TCP_VU_H
> #define TCP_VU_H
>
> -int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags);
> -int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn);
> +int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags,
> + unsigned int qpair);
> +int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn,
> + unsigned int qpair);
>
> #endif /*TCP_VU_H */
> --
> 2.54.0
>
--
David Gibson (he or they) | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you, not the other way
| around.
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
next prev parent reply other threads:[~2026-06-19 6:00 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-16 12:51 [PATCH v5 00/12] vhost-user: Add multiqueue support Laurent Vivier
2026-06-16 12:51 ` [PATCH v5 01/12] tap: Remove pool parameter from tap4_handler() and tap6_handler() Laurent Vivier
2026-06-16 12:51 ` [PATCH v5 02/12] vhost-user: Advertise multiqueue support Laurent Vivier
2026-06-19 5:17 ` David Gibson
2026-06-16 12:51 ` [PATCH v5 03/12] test: Add multiqueue support to vhost-user test infrastructure Laurent Vivier
2026-06-19 5:06 ` David Gibson
2026-06-16 12:51 ` [PATCH v5 04/12] tap: Thread queue pair through all remaining tap paths Laurent Vivier
2026-06-19 5:37 ` David Gibson
2026-06-16 12:51 ` [PATCH v5 05/12] arp: Pass queue pair explicitly through ARP send path Laurent Vivier
2026-06-19 5:40 ` David Gibson
2026-06-16 12:51 ` [PATCH v5 06/12] tcp: Pass queue pair explicitly through TCP " Laurent Vivier
2026-06-19 6:00 ` David Gibson [this message]
2026-06-16 12:51 ` [PATCH v5 07/12] udp: Pass queue pair explicitly through UDP " Laurent Vivier
2026-06-19 6:08 ` David Gibson
2026-06-16 12:51 ` [PATCH v5 08/12] dhcp/dhcpv6: Pass queue pair explicitly through DHCP " Laurent Vivier
2026-06-19 6:10 ` David Gibson
2026-06-16 12:51 ` [PATCH v5 09/12] icmp: Pass queue pair explicitly through ICMP " Laurent Vivier
2026-06-19 6:12 ` David Gibson
2026-06-16 12:51 ` [PATCH v5 10/12] ndp: Pass queue pair explicitly through NDP " Laurent Vivier
2026-06-19 6:17 ` David Gibson
2026-06-16 12:51 ` [PATCH v5 11/12] flow: Add queue pair tracking to flow management Laurent Vivier
2026-06-19 6:36 ` David Gibson
2026-06-16 12:51 ` [PATCH v5 12/12] flow: Derive epoll fd from queue pair, removing epollid field Laurent Vivier
2026-06-19 6:52 ` David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ajTbChyykMCYiOHm@zatzit \
--to=david@gibson.dropbear.id.au \
--cc=lvivier@redhat.com \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).