* [PATCH v4 4/6] vhost-user: Add queue pair parameter throughout the network stack
2025-12-15 12:40 [PATCH v4 0/6] vhost-user: Add multiqueue support Laurent Vivier
` (2 preceding siblings ...)
2025-12-15 12:40 ` [PATCH v4 3/6] test: Add multiqueue support to vhost-user test infrastructure Laurent Vivier
@ 2025-12-15 12:40 ` Laurent Vivier
2025-12-15 12:40 ` [PATCH v4 5/6] tap: Convert packet pools to per-queue-pair arrays for multiqueue Laurent Vivier
2025-12-15 12:40 ` [PATCH v4 6/6] flow: Add queue pair tracking to flow management Laurent Vivier
5 siblings, 0 replies; 7+ messages in thread
From: Laurent Vivier @ 2025-12-15 12:40 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
Add a queue pair parameter to vu_send_single() and propagate this parameter
through the entire network stack call chain. The queue pair parameter specifies
which queue pair to use for sending packets in vhost-user mode.
New macros in passt.h help manage queue pairs and queue indices:
- QPAIR_DEFAULT: Default queue pair (0)
- QPAIR_FROMGUEST_QUEUE(qpair): Convert queue pair to TX queue index
- QPAIR_TOGUEST_QUEUE(qpair): Convert queue pair to RX queue index
- QPAIR_FROM_QUEUE(queue): Extract queue pair number from queue index
Queue pairs consist of one RX queue (even index) and one TX queue (odd
index). For example, pair 0 uses RX queue 0 and TX queue 1; pair 1 uses
RX queue 2 and TX queue 3.
All callers currently pass QPAIR_DEFAULT to preserve existing single-queue
behavior. This is a preparatory step for enabling multi-queue and
per-queue worker threads in vhost-user mode.
No functional change.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
arp.c | 15 +++++----
arp.h | 6 ++--
dhcp.c | 5 +--
dhcp.h | 2 +-
dhcpv6.c | 12 ++++---
dhcpv6.h | 2 +-
fwd.c | 18 ++++++-----
fwd.h | 5 +--
icmp.c | 6 ++--
ndp.c | 35 ++++++++++++--------
ndp.h | 7 ++--
netlink.c | 2 +-
passt.c | 2 +-
passt.h | 8 +++++
tap.c | 86 +++++++++++++++++++++++++++++---------------------
tap.h | 19 +++++------
tcp.c | 16 ++++++----
tcp.h | 7 ++--
udp.c | 22 +++++++------
udp.h | 4 +--
udp_internal.h | 4 +--
udp_vu.c | 3 +-
vhost_user.c | 4 ++-
vhost_user.h | 6 ----
vu_common.c | 22 ++++++++-----
vu_common.h | 3 +-
26 files changed, 187 insertions(+), 134 deletions(-)
diff --git a/arp.c b/arp.c
index bb042e9585a3..1dc8b87cd993 100644
--- a/arp.c
+++ b/arp.c
@@ -63,11 +63,12 @@ static bool ignore_arp(const struct ctx *c,
/**
* arp() - Check if this is a supported ARP message, reply as needed
* @c: Execution context
+ * @qpair: Queue pair on which to send the reply
* @data: Single packet with Ethernet buffer
*
* Return: 1 if handled, -1 on failure
*/
-int arp(const struct ctx *c, struct iov_tail *data)
+int arp(const struct ctx *c, unsigned int qpair, struct iov_tail *data)
{
union inany_addr tgt;
struct {
@@ -112,7 +113,7 @@ int arp(const struct ctx *c, struct iov_tail *data)
memcpy(resp.am.tha, am->sha, sizeof(resp.am.tha));
memcpy(resp.am.tip, am->sip, sizeof(resp.am.tip));
- tap_send_single(c, &resp, sizeof(resp));
+ tap_send_single(c, qpair, &resp, sizeof(resp));
return 1;
}
@@ -120,8 +121,9 @@ int arp(const struct ctx *c, struct iov_tail *data)
/**
* arp_send_init_req() - Send initial ARP request to retrieve guest MAC address
* @c: Execution context
+ * @qpair: Queue pair on which to send the request
*/
-void arp_send_init_req(const struct ctx *c)
+void arp_send_init_req(const struct ctx *c, unsigned int qpair)
{
struct {
struct ethhdr eh;
@@ -148,16 +150,17 @@ void arp_send_init_req(const struct ctx *c)
memcpy(req.am.tip, &c->ip4.addr, sizeof(req.am.tip));
debug("Sending initial ARP request for guest MAC address");
- tap_send_single(c, &req, sizeof(req));
+ tap_send_single(c, qpair, &req, sizeof(req));
}
/**
* arp_announce() - Send an ARP announcement for an IPv4 host
* @c: Execution context
+ * @qpair: Queue pair on which to send the announcement
* @ip: IPv4 address we announce as owned by @mac
* @mac: MAC address to advertise for @ip
*/
-void arp_announce(const struct ctx *c, struct in_addr *ip,
+void arp_announce(const struct ctx *c, unsigned int qpair, struct in_addr *ip,
const unsigned char *mac)
{
char ip_str[INET_ADDRSTRLEN];
@@ -202,5 +205,5 @@ void arp_announce(const struct ctx *c, struct in_addr *ip,
eth_ntop(mac, mac_str, sizeof(mac_str));
debug("ARP announcement for %s / %s", ip_str, mac_str);
- tap_send_single(c, &msg, sizeof(msg));
+ tap_send_single(c, qpair, &msg, sizeof(msg));
}
diff --git a/arp.h b/arp.h
index 4862e90a14ee..0f7a722a8ea8 100644
--- a/arp.h
+++ b/arp.h
@@ -20,9 +20,9 @@ struct arpmsg {
unsigned char tip[4];
} __attribute__((__packed__));
-int arp(const struct ctx *c, struct iov_tail *data);
-void arp_send_init_req(const struct ctx *c);
-void arp_announce(const struct ctx *c, struct in_addr *ip,
+int arp(const struct ctx *c, unsigned int qpair, struct iov_tail *data);
+void arp_send_init_req(const struct ctx *c, unsigned int qpair);
+void arp_announce(const struct ctx *c, unsigned int qpair, struct in_addr *ip,
const unsigned char *mac);
#endif /* ARP_H */
diff --git a/dhcp.c b/dhcp.c
index 6b9c2e3b9e5a..e3f5673cc5d8 100644
--- a/dhcp.c
+++ b/dhcp.c
@@ -296,11 +296,12 @@ static void opt_set_dns_search(const struct ctx *c, size_t max_len)
/**
* dhcp() - Check if this is a DHCP message, reply as needed
* @c: Execution context
+ * @qpair: Queue pair on which to send the reply
* @data: Single packet with Ethernet buffer
*
* Return: 0 if it's not a DHCP message, 1 if handled, -1 on failure
*/
-int dhcp(const struct ctx *c, struct iov_tail *data)
+int dhcp(const struct ctx *c, unsigned int qpair, struct iov_tail *data)
{
char macstr[ETH_ADDRSTRLEN];
size_t mlen, dlen, opt_len;
@@ -471,7 +472,7 @@ int dhcp(const struct ctx *c, struct iov_tail *data)
else
dst = c->ip4.addr;
- tap_udp4_send(c, c->ip4.our_tap_addr, 67, dst, 68, &reply, dlen);
+ tap_udp4_send(c, qpair, c->ip4.our_tap_addr, 67, dst, 68, &reply, dlen);
return 1;
}
diff --git a/dhcp.h b/dhcp.h
index cd50c99b8856..6d034f0c58af 100644
--- a/dhcp.h
+++ b/dhcp.h
@@ -6,7 +6,7 @@
#ifndef DHCP_H
#define DHCP_H
-int dhcp(const struct ctx *c, struct iov_tail *data);
+int dhcp(const struct ctx *c, unsigned int qpair, struct iov_tail *data);
void dhcp_init(void);
#endif /* DHCP_H */
diff --git a/dhcpv6.c b/dhcpv6.c
index e4df0db562e6..5fffac5d95e5 100644
--- a/dhcpv6.c
+++ b/dhcpv6.c
@@ -369,12 +369,13 @@ notonlink:
/**
* dhcpv6_send_ia_notonlink() - Send NotOnLink status
* @c: Execution context
+ * @qpair: Queue pair on which to send the reply
* @ia_base: Non-appropriate IA_NA or IA_TA base
* @client_id_base: Client ID message option base
* @len: Client ID length
* @xid: Transaction ID for message exchange
*/
-static void dhcpv6_send_ia_notonlink(struct ctx *c,
+static void dhcpv6_send_ia_notonlink(struct ctx *c, unsigned int qpair,
const struct iov_tail *ia_base,
const struct iov_tail *client_id_base,
int len, uint32_t xid)
@@ -404,7 +405,7 @@ static void dhcpv6_send_ia_notonlink(struct ctx *c,
resp_not_on_link.hdr.xid = xid;
- tap_udp6_send(c, src, 547, tap_ip6_daddr(c, src), 546,
+ tap_udp6_send(c, qpair, src, 547, tap_ip6_daddr(c, src), 546,
xid, &resp_not_on_link, n);
}
@@ -539,13 +540,14 @@ static size_t dhcpv6_client_fqdn_fill(const struct iov_tail *data,
/**
* dhcpv6() - Check if this is a DHCPv6 message, reply as needed
* @c: Execution context
+ * @qpair: Queue pair on which to send the reply
* @data: Single packet starting from UDP header
* @saddr: Source IPv6 address of original message
* @daddr: Destination IPv6 address of original message
*
* Return: 0 if it's not a DHCPv6 message, 1 if handled, -1 on failure
*/
-int dhcpv6(struct ctx *c, struct iov_tail *data,
+int dhcpv6(struct ctx *c, unsigned int qpair, struct iov_tail *data,
const struct in6_addr *saddr, const struct in6_addr *daddr)
{
const struct opt_server_id *server_id = NULL;
@@ -627,7 +629,7 @@ int dhcpv6(struct ctx *c, struct iov_tail *data,
if (dhcpv6_ia_notonlink(data, &c->ip6.addr)) {
- dhcpv6_send_ia_notonlink(c, data, &client_id_base,
+ dhcpv6_send_ia_notonlink(c, qpair, data, &client_id_base,
ntohs(client_id->l), mh->xid);
return 1;
@@ -677,7 +679,7 @@ int dhcpv6(struct ctx *c, struct iov_tail *data,
resp.hdr.xid = mh->xid;
- tap_udp6_send(c, src, 547, tap_ip6_daddr(c, src), 546,
+ tap_udp6_send(c, qpair, src, 547, tap_ip6_daddr(c, src), 546,
mh->xid, &resp, n);
c->ip6.addr_seen = c->ip6.addr;
diff --git a/dhcpv6.h b/dhcpv6.h
index c706dfdbb2ac..3a249b39e6c7 100644
--- a/dhcpv6.h
+++ b/dhcpv6.h
@@ -6,7 +6,7 @@
#ifndef DHCPV6_H
#define DHCPV6_H
-int dhcpv6(struct ctx *c, struct iov_tail *data,
+int dhcpv6(struct ctx *c, unsigned int qpair, struct iov_tail *data,
struct in6_addr *saddr, struct in6_addr *daddr);
void dhcpv6_init(const struct ctx *c);
diff --git a/fwd.c b/fwd.c
index 44a0e1096971..e6c132ce2f91 100644
--- a/fwd.c
+++ b/fwd.c
@@ -110,12 +110,14 @@ static struct neigh_table_entry *fwd_neigh_table_find(const struct ctx *c,
/**
* fwd_neigh_table_update() - Allocate or update neighbour table entry
* @c: Execution context
+ * @qpair: Queue pair to use for sending announcements
* @addr: IP address used to determine insertion slot and store in entry
* @mac: The MAC address associated with the neighbour address
* @permanent: Created entry cannot be altered or freed
*/
-void fwd_neigh_table_update(const struct ctx *c, const union inany_addr *addr,
- const uint8_t *mac, bool permanent)
+void fwd_neigh_table_update(const struct ctx *c, unsigned int qpair,
+ const union inany_addr *addr, const uint8_t *mac,
+ bool permanent)
{
struct neigh_table *t = &neigh_table;
struct neigh_table_entry *e;
@@ -147,9 +149,9 @@ void fwd_neigh_table_update(const struct ctx *c, const union inany_addr *addr,
return;
if (inany_v4(addr))
- arp_announce(c, inany_v4(addr), e->mac);
+ arp_announce(c, qpair, inany_v4(addr), e->mac);
else
- ndp_unsolicited_na(c, &addr->a6);
+ ndp_unsolicited_na(c, qpair, &addr->a6);
}
/**
@@ -230,19 +232,19 @@ void fwd_neigh_table_init(const struct ctx *c)
/* Blocker entries to stop events from hosts using these addresses */
if (!inany_is_unspecified4(&mhl))
- fwd_neigh_table_update(c, &mhl, c->our_tap_mac, true);
+ fwd_neigh_table_update(c, QPAIR_DEFAULT, &mhl, c->our_tap_mac, true);
if (!inany_is_unspecified4(&mga))
- fwd_neigh_table_update(c, &mga, c->our_tap_mac, true);
+ fwd_neigh_table_update(c, QPAIR_DEFAULT, &mga, c->our_tap_mac, true);
mhl = *(union inany_addr *)&c->ip6.map_host_loopback;
mga = *(union inany_addr *)&c->ip6.map_guest_addr;
if (!inany_is_unspecified6(&mhl))
- fwd_neigh_table_update(c, &mhl, c->our_tap_mac, true);
+ fwd_neigh_table_update(c, QPAIR_DEFAULT, &mhl, c->our_tap_mac, true);
if (!inany_is_unspecified6(&mga))
- fwd_neigh_table_update(c, &mga, c->our_tap_mac, true);
+ fwd_neigh_table_update(c, QPAIR_DEFAULT, &mga, c->our_tap_mac, true);
}
/** fwd_probe_ephemeral() - Determine what ports this host considers ephemeral
diff --git a/fwd.h b/fwd.h
index 779258221a9a..839737028ace 100644
--- a/fwd.h
+++ b/fwd.h
@@ -55,8 +55,9 @@ uint8_t fwd_nat_from_splice(const struct ctx *c, uint8_t proto,
const struct flowside *ini, struct flowside *tgt);
uint8_t fwd_nat_from_host(const struct ctx *c, uint8_t proto,
const struct flowside *ini, struct flowside *tgt);
-void fwd_neigh_table_update(const struct ctx *c, const union inany_addr *addr,
- const uint8_t *mac, bool permanent);
+void fwd_neigh_table_update(const struct ctx *c, unsigned int qpair,
+ const union inany_addr *addr, const uint8_t *mac,
+ bool permanent);
void fwd_neigh_table_free(const struct ctx *c,
const union inany_addr *addr);
void fwd_neigh_mac_get(const struct ctx *c, const union inany_addr *addr,
diff --git a/icmp.c b/icmp.c
index 9564c4963f7b..fbdd0bbdf546 100644
--- a/icmp.c
+++ b/icmp.c
@@ -132,12 +132,14 @@ void icmp_sock_handler(const struct ctx *c, union epoll_ref ref)
const struct in_addr *daddr = inany_v4(&ini->eaddr);
ASSERT(saddr && daddr); /* Must have IPv4 addresses */
- tap_icmp4_send(c, *saddr, *daddr, buf, pingf->f.tap_omac, n);
+ tap_icmp4_send(c, QPAIR_DEFAULT, *saddr, *daddr, buf,
+ pingf->f.tap_omac, n);
} else if (pingf->f.type == FLOW_PING6) {
const struct in6_addr *saddr = &ini->oaddr.a6;
const struct in6_addr *daddr = &ini->eaddr.a6;
- tap_icmp6_send(c, saddr, daddr, buf, pingf->f.tap_omac, n);
+ tap_icmp6_send(c, QPAIR_DEFAULT, saddr, daddr, buf,
+ pingf->f.tap_omac, n);
}
return;
diff --git a/ndp.c b/ndp.c
index eb9e31399555..07700aea713e 100644
--- a/ndp.c
+++ b/ndp.c
@@ -175,25 +175,27 @@ struct ndp_ns {
/**
* ndp_send() - Send an NDP message
* @c: Execution context
+ * @qpair: Queue pair on which to send the message
* @dst: IPv6 address to send the message to
* @buf: ICMPv6 header + message payload
* @l4len: Length of message, including ICMPv6 header
*/
-static void ndp_send(const struct ctx *c, const struct in6_addr *dst,
+static void ndp_send(const struct ctx *c, unsigned int qpair, const struct in6_addr *dst,
const void *buf, size_t l4len)
{
const struct in6_addr *src = &c->ip6.our_tap_ll;
- tap_icmp6_send(c, src, dst, buf, c->our_tap_mac, l4len);
+ tap_icmp6_send(c, qpair, src, dst, buf, c->our_tap_mac, l4len);
}
/**
* ndp_na() - Send an NDP Neighbour Advertisement (NA) message
* @c: Execution context
+ * @qpair: Queue pair on which to send the NA
* @dst: IPv6 address to send the NA to
* @addr: IPv6 address to advertise
*/
-static void ndp_na(const struct ctx *c, const struct in6_addr *dst,
+static void ndp_na(const struct ctx *c, unsigned int qpair, const struct in6_addr *dst,
const struct in6_addr *addr)
{
union inany_addr tgt;
@@ -217,26 +219,29 @@ static void ndp_na(const struct ctx *c, const struct in6_addr *dst,
inany_from_af(&tgt, AF_INET6, addr);
fwd_neigh_mac_get(c, &tgt, na.target_l2_addr.mac);
- ndp_send(c, dst, &na, sizeof(na));
+ ndp_send(c, qpair, dst, &na, sizeof(na));
}
/**
* ndp_unsolicited_na() - Send unsolicited NA
* @c: Execution context
+ * @qpair: Queue pair on which to send the NA
* @addr: IPv6 address to advertise
*/
-void ndp_unsolicited_na(const struct ctx *c, const struct in6_addr *addr)
+void ndp_unsolicited_na(const struct ctx *c, unsigned int qpair,
+ const struct in6_addr *addr)
{
if (tap_is_ready(c))
- ndp_na(c, &in6addr_ll_all_nodes, addr);
+ ndp_na(c, qpair, &in6addr_ll_all_nodes, addr);
}
/**
* ndp_ra() - Send an NDP Router Advertisement (RA) message
* @c: Execution context
+ * @qpair: Queue pair on which to send the RA
* @dst: IPv6 address to send the RA to
*/
-static void ndp_ra(const struct ctx *c, const struct in6_addr *dst)
+static void ndp_ra(const struct ctx *c, unsigned int qpair, const struct in6_addr *dst)
{
struct ndp_ra ra = {
.ih = {
@@ -342,18 +347,19 @@ static void ndp_ra(const struct ctx *c, const struct in6_addr *dst)
memcpy(&ra.source_ll.mac, c->our_tap_mac, ETH_ALEN);
/* NOLINTNEXTLINE(clang-analyzer-security.PointerSub) */
- ndp_send(c, dst, &ra, ptr - (unsigned char *)&ra);
+ ndp_send(c, qpair, dst, &ra, ptr - (unsigned char *)&ra);
}
/**
* ndp() - Check for NDP solicitations, reply as needed
* @c: Execution context
+ * @qpair: Queue pair on which to send replies
* @saddr: Source IPv6 address
* @data: Single packet with ICMPv6 header
*
* Return: 0 if not handled here, 1 if handled, -1 on failure
*/
-int ndp(const struct ctx *c, const struct in6_addr *saddr,
+int ndp(const struct ctx *c, unsigned int qpair, const struct in6_addr *saddr,
struct iov_tail *data)
{
struct icmp6hdr ih_storage;
@@ -382,13 +388,13 @@ int ndp(const struct ctx *c, const struct in6_addr *saddr,
info("NDP: received NS, sending NA");
- ndp_na(c, saddr, &ns->target_addr);
+ ndp_na(c, qpair, saddr, &ns->target_addr);
} else if (ih->icmp6_type == RS) {
if (c->no_ra)
return 1;
info("NDP: received RS, sending RA");
- ndp_ra(c, saddr);
+ ndp_ra(c, qpair, saddr);
}
return 1;
@@ -446,7 +452,7 @@ void ndp_timer(const struct ctx *c, const struct timespec *now)
info("NDP: sending unsolicited RA, next in %llds", (long long)interval);
- ndp_ra(c, &in6addr_ll_all_nodes);
+ ndp_ra(c, QPAIR_DEFAULT, &in6addr_ll_all_nodes);
first:
next_ra = now->tv_sec + interval;
@@ -455,8 +461,9 @@ first:
/**
* ndp_send_init_req() - Send initial NDP NS to retrieve guest MAC address
* @c: Execution context
+ * @qpair: Queue pair on which to send the request
*/
-void ndp_send_init_req(const struct ctx *c)
+void ndp_send_init_req(const struct ctx *c, unsigned int qpair)
{
struct ndp_ns ns = {
.ih = {
@@ -469,5 +476,5 @@ void ndp_send_init_req(const struct ctx *c)
.target_addr = c->ip6.addr
};
debug("Sending initial NDP NS request for guest MAC address");
- ndp_send(c, &c->ip6.addr, &ns, sizeof(ns));
+ ndp_send(c, qpair, &c->ip6.addr, &ns, sizeof(ns));
}
diff --git a/ndp.h b/ndp.h
index 56b756d8400b..8c168fc199fe 100644
--- a/ndp.h
+++ b/ndp.h
@@ -8,10 +8,11 @@
struct icmp6hdr;
-int ndp(const struct ctx *c, const struct in6_addr *saddr,
+int ndp(const struct ctx *c, unsigned int qpair, const struct in6_addr *saddr,
struct iov_tail *data);
void ndp_timer(const struct ctx *c, const struct timespec *now);
-void ndp_send_init_req(const struct ctx *c);
-void ndp_unsolicited_na(const struct ctx *c, const struct in6_addr *addr);
+void ndp_send_init_req(const struct ctx *c, unsigned int qpair);
+void ndp_unsolicited_na(const struct ctx *c, unsigned int qpair,
+ const struct in6_addr *addr);
#endif /* NDP_H */
diff --git a/netlink.c b/netlink.c
index 82a2f0c9aef7..d687c022d083 100644
--- a/netlink.c
+++ b/netlink.c
@@ -1201,7 +1201,7 @@ static void nl_neigh_msg_read(const struct ctx *c, struct nlmsghdr *nh)
eth_ntop(lladdr, mac_str, sizeof(mac_str));
trace("neighbour notifier update: %s / %s", ip_str, mac_str);
- fwd_neigh_table_update(c, &daddr, lladdr, false);
+ fwd_neigh_table_update(c, QPAIR_DEFAULT, &daddr, lladdr, false);
}
/**
diff --git a/passt.c b/passt.c
index 5ed88d0708e3..2dcfbdcd27f4 100644
--- a/passt.c
+++ b/passt.c
@@ -260,7 +260,7 @@ static void passt_worker(void *opaque, int nfds, struct epoll_event *events)
tap_handler_passt(c, eventmask, &now);
break;
case EPOLL_TYPE_TAP_LISTEN:
- tap_listen_handler(c, eventmask);
+ tap_listen_handler(c, QPAIR_DEFAULT, eventmask);
break;
case EPOLL_TYPE_NSQUIT_INOTIFY:
pasta_netns_quit_inotify_handler(c, ref.fd);
diff --git a/passt.h b/passt.h
index 79d01ddb7f1f..1633fbc24c24 100644
--- a/passt.h
+++ b/passt.h
@@ -28,6 +28,14 @@ union epoll_ref;
#include "udp.h"
#include "vhost_user.h"
+/* Queue pairs consist of one RX queue (even index) and one TX queue (odd index).
+ * Example: pair 0 has RX queue 0 and TX queue 1; pair 1 has RX queue 2 and TX queue 3.
+ */
+#define QPAIR_DEFAULT 0 /* Default queue pair */
+#define QPAIR_FROMGUEST_QUEUE(qpair) ((size_t)(qpair) * 2 + 1) /* TX queue index from pair */
+#define QPAIR_TOGUEST_QUEUE(qpair) ((size_t)(qpair) * 2) /* RX queue index from pair */
+#define QPAIR_FROM_QUEUE(queue) ((queue) / 2) /* Extract pair from queue */
+
/* Default address for our end on the tap interface. Bit 0 of byte 0 must be 0
* (unicast) and bit 1 of byte 1 must be 1 (locally administered). Otherwise
* it's arbitrary.
diff --git a/tap.c b/tap.c
index f80183d95a4e..1bbf6d2c897e 100644
--- a/tap.c
+++ b/tap.c
@@ -125,10 +125,12 @@ unsigned long tap_l2_max_len(const struct ctx *c)
/**
* tap_send_single() - Send a single frame
* @c: Execution context
+ * @qpair: Queue pair on which to send the frame
* @data: Packet buffer
* @l2len: Total L2 packet length
*/
-void tap_send_single(const struct ctx *c, const void *data, size_t l2len)
+void tap_send_single(const struct ctx *c, unsigned int qpair, const void *data,
+ size_t l2len)
{
uint8_t padded[ETH_ZLEN] = { 0 };
struct iovec iov[2];
@@ -156,7 +158,7 @@ void tap_send_single(const struct ctx *c, const void *data, size_t l2len)
tap_send_frames(c, iov, iovcnt, 1);
break;
case MODE_VU:
- vu_send_single(c, data, l2len);
+ vu_send_single(c, qpair, data, l2len);
break;
}
}
@@ -259,6 +261,7 @@ void *tap_push_uh4(struct udphdr *uh, struct in_addr src, in_port_t sport,
/**
* tap_udp4_send() - Send UDP over IPv4 packet
* @c: Execution context
+ * @qpair: Queue pair on which to send packet
* @src: IPv4 source address
* @sport: UDP source port
* @dst: IPv4 destination address
@@ -266,7 +269,7 @@ void *tap_push_uh4(struct udphdr *uh, struct in_addr src, in_port_t sport,
* @in: UDP payload contents (not including UDP header)
* @dlen: UDP payload length (not including UDP header)
*/
-void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
+void tap_udp4_send(const struct ctx *c, unsigned int qpair, struct in_addr src, in_port_t sport,
struct in_addr dst, in_port_t dport,
const void *in, size_t dlen)
{
@@ -277,20 +280,22 @@ void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
char *data = tap_push_uh4(uh, src, sport, dst, dport, in, dlen);
memcpy(data, in, dlen);
- tap_send_single(c, buf, dlen + (data - buf));
+ tap_send_single(c, qpair, buf, dlen + (data - buf));
}
/**
* tap_icmp4_send() - Send ICMPv4 packet
* @c: Execution context
+ * @qpair: Queue pair on which to send packet
* @src: IPv4 source address
* @dst: IPv4 destination address
* @in: ICMP packet, including ICMP header
* @src_mac: MAC address to be used as source for message
* @l4len: ICMP packet length, including ICMP header
*/
-void tap_icmp4_send(const struct ctx *c, struct in_addr src, struct in_addr dst,
- const void *in, const void *src_mac, size_t l4len)
+void tap_icmp4_send(const struct ctx *c, unsigned int qpair, struct in_addr src,
+ struct in_addr dst, const void *in, const void *src_mac,
+ size_t l4len)
{
char buf[USHRT_MAX];
struct iphdr *ip4h = tap_push_l2h(c, buf, src_mac, ETH_P_IP);
@@ -300,7 +305,7 @@ void tap_icmp4_send(const struct ctx *c, struct in_addr src, struct in_addr dst,
memcpy(icmp4h, in, l4len);
csum_icmp4(icmp4h, icmp4h + 1, l4len - sizeof(*icmp4h));
- tap_send_single(c, buf, l4len + ((char *)icmp4h - buf));
+ tap_send_single(c, qpair, buf, l4len + ((char *)icmp4h - buf));
}
/**
@@ -364,6 +369,7 @@ void *tap_push_uh6(struct udphdr *uh,
/**
* tap_udp6_send() - Send UDP over IPv6 packet
* @c: Execution context
+ * @qpair: Queue pair on which to send packet
* @src: IPv6 source address
* @sport: UDP source port
* @dst: IPv6 destination address
@@ -372,7 +378,7 @@ void *tap_push_uh6(struct udphdr *uh,
* @in: UDP payload contents (not including UDP header)
* @dlen: UDP payload length (not including UDP header)
*/
-void tap_udp6_send(const struct ctx *c,
+void tap_udp6_send(const struct ctx *c, unsigned int qpair,
const struct in6_addr *src, in_port_t sport,
const struct in6_addr *dst, in_port_t dport,
uint32_t flow, void *in, size_t dlen)
@@ -385,19 +391,20 @@ void tap_udp6_send(const struct ctx *c,
char *data = tap_push_uh6(uh, src, sport, dst, dport, in, dlen);
memcpy(data, in, dlen);
- tap_send_single(c, buf, dlen + (data - buf));
+ tap_send_single(c, qpair, buf, dlen + (data - buf));
}
/**
* tap_icmp6_send() - Send ICMPv6 packet
* @c: Execution context
+ * @qpair: Queue pair on which to send packet
* @src: IPv6 source address
* @dst: IPv6 destination address
* @in: ICMP packet, including ICMP header
* @src_mac: MAC address to be used as source for message
* @l4len: ICMP packet length, including ICMP header
*/
-void tap_icmp6_send(const struct ctx *c,
+void tap_icmp6_send(const struct ctx *c, unsigned int qpair,
const struct in6_addr *src, const struct in6_addr *dst,
const void *in, const void *src_mac, size_t l4len)
{
@@ -409,7 +416,7 @@ void tap_icmp6_send(const struct ctx *c,
memcpy(icmp6h, in, l4len);
csum_icmp6(icmp6h, src, dst, icmp6h + 1, l4len - sizeof(*icmp6h));
- tap_send_single(c, buf, l4len + ((char *)icmp6h - buf));
+ tap_send_single(c, qpair, buf, l4len + ((char *)icmp6h - buf));
}
/**
@@ -705,11 +712,13 @@ static bool tap4_is_fragment(const struct iphdr *iph,
/**
* tap4_handler() - IPv4 and ARP packet handler for tap file descriptor
* @c: Execution context
+ * @qpair: Queue pair on which to send packets
* @now: Current timestamp
*
* Return: count of packets consumed by handlers
*/
-static int tap4_handler(struct ctx *c, const struct timespec *now)
+static int tap4_handler(struct ctx *c, unsigned int qpair,
+ const struct timespec *now)
{
unsigned int i, j, seq_count;
struct tap4_l4_t *seq;
@@ -736,7 +745,7 @@ resume:
if (!eh)
continue;
if (ntohs(eh->h_proto) == ETH_P_ARP) {
- arp(c, &data);
+ arp(c, qpair, &data);
continue;
}
@@ -797,7 +806,7 @@ resume:
struct iov_tail eh_data;
packet_get(pool_tap4, i, &eh_data);
- if (dhcp(c, &eh_data))
+ if (dhcp(c, qpair, &eh_data))
continue;
}
@@ -860,7 +869,7 @@ append:
if (c->no_tcp)
continue;
for (k = 0; k < p->count; )
- k += tcp_tap_handler(c, PIF_TAP, AF_INET,
+ k += tcp_tap_handler(c, qpair, PIF_TAP, AF_INET,
&seq->saddr, &seq->daddr,
0, p, k, now);
} else if (seq->protocol == IPPROTO_UDP) {
@@ -882,11 +891,12 @@ append:
/**
* tap6_handler() - IPv6 packet handler for tap file descriptor
* @c: Execution context
+ * @qpair: Queue pair on which to send packets
* @now: Current timestamp
*
* Return: count of packets consumed by handlers
*/
-static int tap6_handler(struct ctx *c, const struct timespec *now)
+static int tap6_handler(struct ctx *c, unsigned int qpair, const struct timespec *now)
{
unsigned int i, j, seq_count = 0;
struct tap6_l4_t *seq;
@@ -963,7 +973,7 @@ resume:
continue;
ndp_data = data;
- if (ndp(c, saddr, &ndp_data))
+ if (ndp(c, qpair, saddr, &ndp_data))
continue;
tap_packet_debug(NULL, ip6h, NULL, proto, NULL, 1);
@@ -982,7 +992,7 @@ resume:
if (proto == IPPROTO_UDP) {
struct iov_tail uh_data = data;
- if (dhcpv6(c, &uh_data, saddr, daddr))
+ if (dhcpv6(c, qpair, &uh_data, saddr, daddr))
continue;
}
@@ -1050,7 +1060,7 @@ append:
if (c->no_tcp)
continue;
for (k = 0; k < p->count; )
- k += tcp_tap_handler(c, PIF_TAP, AF_INET6,
+ k += tcp_tap_handler(c, qpair, PIF_TAP, AF_INET6,
&seq->saddr, &seq->daddr,
seq->flow_lbl, p, k, now);
} else if (seq->protocol == IPPROTO_UDP) {
@@ -1081,21 +1091,23 @@ void tap_flush_pools(void)
/**
* tap_handler() - IPv4/IPv6 and ARP packet handler for tap file descriptor
* @c: Execution context
+ * @qpair: Queue pair on which to send packets
* @now: Current timestamp
*/
-void tap_handler(struct ctx *c, const struct timespec *now)
+void tap_handler(struct ctx *c, unsigned int qpair, const struct timespec *now)
{
- tap4_handler(c, now);
- tap6_handler(c, now);
+ tap4_handler(c, qpair, now);
+ tap6_handler(c, qpair, now);
}
/**
* tap_add_packet() - Queue/capture packet, update notion of guest MAC address
* @c: Execution context
+ * @qpair: Queue pair associated with the packet
* @data: Packet to add to the pool
* @now: Current timestamp
*/
-void tap_add_packet(struct ctx *c, struct iov_tail *data,
+void tap_add_packet(struct ctx *c, unsigned int qpair, struct iov_tail *data,
const struct timespec *now)
{
struct ethhdr eh_storage;
@@ -1120,14 +1132,14 @@ void tap_add_packet(struct ctx *c, struct iov_tail *data,
case ETH_P_ARP:
case ETH_P_IP:
if (!pool_can_fit(pool_tap4, data)) {
- tap4_handler(c, now);
+ tap4_handler(c, qpair, now);
pool_flush(pool_tap4);
}
packet_add(pool_tap4, data);
break;
case ETH_P_IPV6:
if (!pool_can_fit(pool_tap6, data)) {
- tap6_handler(c, now);
+ tap6_handler(c, qpair, now);
pool_flush(pool_tap6);
}
packet_add(pool_tap6, data);
@@ -1214,7 +1226,7 @@ static void tap_passt_input(struct ctx *c, const struct timespec *now)
n -= sizeof(uint32_t);
data = IOV_TAIL_FROM_BUF(p, l2len, 0);
- tap_add_packet(c, &data, now);
+ tap_add_packet(c, QPAIR_DEFAULT, &data, now);
p += l2len;
n -= l2len;
@@ -1223,7 +1235,7 @@ static void tap_passt_input(struct ctx *c, const struct timespec *now)
partial_len = n;
partial_frame = p;
- tap_handler(c, now);
+ tap_handler(c, QPAIR_DEFAULT, now);
}
/**
@@ -1282,10 +1294,10 @@ static void tap_pasta_input(struct ctx *c, const struct timespec *now)
continue;
data = IOV_TAIL_FROM_BUF(pkt_buf + n, len, 0);
- tap_add_packet(c, &data, now);
+ tap_add_packet(c, QPAIR_DEFAULT, &data, now);
}
- tap_handler(c, now);
+ tap_handler(c, QPAIR_DEFAULT, now);
}
/**
@@ -1374,8 +1386,9 @@ bool tap_is_ready(const struct ctx *c)
/**
* tap_start_connection() - start a new connection
* @c: Execution context
+ * @qpair: Queue pair to use for the connection
*/
-static void tap_start_connection(const struct ctx *c)
+static void tap_start_connection(const struct ctx *c, unsigned int qpair)
{
union epoll_ref ref = { 0 };
@@ -1398,17 +1411,18 @@ static void tap_start_connection(const struct ctx *c)
return;
if (c->ifi4)
- arp_send_init_req(c);
+ arp_send_init_req(c, qpair);
if (c->ifi6 && !c->no_ndp)
- ndp_send_init_req(c);
+ ndp_send_init_req(c, qpair);
}
/**
* tap_listen_handler() - Handle new connection on listening socket
* @c: Execution context
+ * @qpair: Queue pair to use for the connection
* @events: epoll events
*/
-void tap_listen_handler(struct ctx *c, uint32_t events)
+void tap_listen_handler(struct ctx *c, unsigned int qpair, uint32_t events)
{
int v = INT_MAX / 2;
struct ucred ucred;
@@ -1448,7 +1462,7 @@ void tap_listen_handler(struct ctx *c, uint32_t events)
setsockopt(c->fd_tap, SOL_SOCKET, SO_SNDBUF, &v, sizeof(v)))
trace("tap: failed to set SO_SNDBUF to %i", v);
- tap_start_connection(c);
+ tap_start_connection(c, qpair);
}
/**
@@ -1498,7 +1512,7 @@ static void tap_sock_tun_init(struct ctx *c)
pasta_ns_conf(c);
- tap_start_connection(c);
+ tap_start_connection(c, QPAIR_DEFAULT);
}
/**
@@ -1535,7 +1549,7 @@ void tap_backend_init(struct ctx *c)
if (c->fd_tap != -1) { /* Passed as --fd */
ASSERT(c->one_off);
- tap_start_connection(c);
+ tap_start_connection(c, QPAIR_DEFAULT);
return;
}
diff --git a/tap.h b/tap.h
index ee22a9d78c44..463bb710d344 100644
--- a/tap.h
+++ b/tap.h
@@ -87,30 +87,31 @@ void *tap_push_ip6h(struct ipv6hdr *ip6h,
const struct in6_addr *src,
const struct in6_addr *dst,
size_t l4len, uint8_t proto, uint32_t flow);
-void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
+void tap_udp4_send(const struct ctx *c, unsigned int qpair, struct in_addr src, in_port_t sport,
struct in_addr dst, in_port_t dport,
const void *in, size_t dlen);
-void tap_icmp4_send(const struct ctx *c, struct in_addr src, struct in_addr dst,
- const void *in, const void *src_mac, size_t l4len);
+void tap_icmp4_send(const struct ctx *c, unsigned int qpair, struct in_addr src,
+ struct in_addr dst, const void *in, const void *src_mac,
+ size_t l4len);
const struct in6_addr *tap_ip6_daddr(const struct ctx *c,
const struct in6_addr *src);
void *tap_push_ip6h(struct ipv6hdr *ip6h,
const struct in6_addr *src, const struct in6_addr *dst,
size_t l4len, uint8_t proto, uint32_t flow);
-void tap_udp6_send(const struct ctx *c,
+void tap_udp6_send(const struct ctx *c, unsigned int qpair,
const struct in6_addr *src, in_port_t sport,
const struct in6_addr *dst, in_port_t dport,
uint32_t flow, void *in, size_t dlen);
-void tap_icmp6_send(const struct ctx *c,
+void tap_icmp6_send(const struct ctx *c, unsigned int qpair,
const struct in6_addr *src, const struct in6_addr *dst,
const void *in, const void *src_mac, size_t l4len);
-void tap_send_single(const struct ctx *c, const void *data, size_t l2len);
+void tap_send_single(const struct ctx *c, unsigned int qpair, const void *data, size_t l2len);
size_t tap_send_frames(const struct ctx *c, const struct iovec *iov,
size_t bufs_per_frame, size_t nframes);
void eth_update_mac(struct ethhdr *eh,
const unsigned char *eth_d, const unsigned char *eth_s);
bool tap_is_ready(const struct ctx *c);
-void tap_listen_handler(struct ctx *c, uint32_t events);
+void tap_listen_handler(struct ctx *c, unsigned int qpair, uint32_t events);
void tap_handler_pasta(struct ctx *c, uint32_t events,
const struct timespec *now);
void tap_handler_passt(struct ctx *c, uint32_t events,
@@ -119,7 +120,7 @@ int tap_sock_unix_open(char *sock_path);
void tap_sock_reset(struct ctx *c);
void tap_backend_init(struct ctx *c);
void tap_flush_pools(void);
-void tap_handler(struct ctx *c, const struct timespec *now);
-void tap_add_packet(struct ctx *c, struct iov_tail *data,
+void tap_handler(struct ctx *c, unsigned int qpair, const struct timespec *now);
+void tap_add_packet(struct ctx *c, unsigned int qpair, struct iov_tail *data,
const struct timespec *now);
#endif /* TAP_H */
diff --git a/tcp.c b/tcp.c
index ce36bbd3943b..9b827d4a9366 100644
--- a/tcp.c
+++ b/tcp.c
@@ -2104,6 +2104,7 @@ static void tcp_conn_from_sock_finish(const struct ctx *c,
/**
* tcp_rst_no_conn() - Send RST in response to a packet with no connection
* @c: Execution context
+ * @qpair: Queue pair on which to send the reply
* @af: Address family, AF_INET or AF_INET6
* @saddr: Source address of the packet we're responding to
* @daddr: Destination address of the packet we're responding to
@@ -2111,7 +2112,7 @@ static void tcp_conn_from_sock_finish(const struct ctx *c,
* @th: TCP header of the packet we're responding to
* @l4len: Packet length, including TCP header
*/
-static void tcp_rst_no_conn(const struct ctx *c, int af,
+static void tcp_rst_no_conn(const struct ctx *c, unsigned int qpair, int af,
const void *saddr, const void *daddr,
uint32_t flow_lbl,
const struct tcphdr *th, size_t l4len)
@@ -2169,12 +2170,13 @@ static void tcp_rst_no_conn(const struct ctx *c, int af,
tcp_update_csum(psum, rsth, &payload);
rst_l2len = ((char *)rsth - buf) + sizeof(*rsth);
- tap_send_single(c, buf, rst_l2len);
+ tap_send_single(c, qpair, buf, rst_l2len);
}
/**
* tcp_tap_handler() - Handle packets from tap and state transitions
* @c: Execution context
+ * @qpair: Queue pair on which to send packets
* @pif: pif on which the packet is arriving
* @af: Address family, AF_INET or AF_INET6
* @saddr: Source address
@@ -2186,9 +2188,10 @@ static void tcp_rst_no_conn(const struct ctx *c, int af,
*
* Return: count of consumed packets
*/
-int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
- const void *saddr, const void *daddr, uint32_t flow_lbl,
- const struct pool *p, int idx, const struct timespec *now)
+int tcp_tap_handler(const struct ctx *c, unsigned int qpair, uint8_t pif,
+ sa_family_t af, const void *saddr, const void *daddr,
+ uint32_t flow_lbl, const struct pool *p, int idx,
+ const struct timespec *now)
{
struct tcp_tap_conn *conn;
struct tcphdr th_storage;
@@ -2228,7 +2231,8 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
tcp_conn_from_tap(c, af, saddr, daddr, th,
opts, optlen, now);
else
- tcp_rst_no_conn(c, af, saddr, daddr, flow_lbl, th, l4len);
+ tcp_rst_no_conn(c, qpair, af, saddr, daddr, flow_lbl, th,
+ l4len);
return 1;
}
diff --git a/tcp.h b/tcp.h
index 3f21e7551684..4ce70babd15a 100644
--- a/tcp.h
+++ b/tcp.h
@@ -15,9 +15,10 @@ void tcp_listen_handler(const struct ctx *c, union epoll_ref ref,
const struct timespec *now);
void tcp_sock_handler(const struct ctx *c, union epoll_ref ref,
uint32_t events);
-int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
- const void *saddr, const void *daddr, uint32_t flow_lbl,
- const struct pool *p, int idx, const struct timespec *now);
+int tcp_tap_handler(const struct ctx *c, unsigned int qpair, uint8_t pif,
+ sa_family_t af, const void *saddr, const void *daddr,
+ uint32_t flow_lbl, const struct pool *p, int idx,
+ const struct timespec *now);
int tcp_sock_init(const struct ctx *c, uint8_t pif,
const union inany_addr *addr, const char *ifname,
in_port_t port);
diff --git a/udp.c b/udp.c
index 08bec50a27af..fd584a835076 100644
--- a/udp.c
+++ b/udp.c
@@ -411,13 +411,14 @@ static void udp_tap_prepare(const struct mmsghdr *mmh,
/**
* udp_send_tap_icmp4() - Construct and send ICMPv4 to local peer
* @c: Execution context
+ * @qpair: Queue pair on which to send the ICMPv4 packet
* @ee: Extended error descriptor
* @toside: Destination side of flow
* @saddr: Address of ICMP generating node
* @in: First bytes (max 8) of original UDP message body
* @dlen: Length of the read part of original UDP message body
*/
-static void udp_send_tap_icmp4(const struct ctx *c,
+static void udp_send_tap_icmp4(const struct ctx *c, unsigned int qpair,
const struct sock_extended_err *ee,
const struct flowside *toside,
struct in_addr saddr,
@@ -453,13 +454,14 @@ static void udp_send_tap_icmp4(const struct ctx *c,
/* Try to obtain the MAC address of the generating node */
saddr_any = inany_from_v4(saddr);
fwd_neigh_mac_get(c, &saddr_any, tap_omac);
- tap_icmp4_send(c, saddr, eaddr, &msg, tap_omac, msglen);
+ tap_icmp4_send(c, qpair, saddr, eaddr, &msg, tap_omac, msglen);
}
/**
* udp_send_tap_icmp6() - Construct and send ICMPv6 to local peer
* @c: Execution context
+ * @qpair: Queue pair on which to send the ICMPv6 packet
* @ee: Extended error descriptor
* @toside: Destination side of flow
* @saddr: Address of ICMP generating node
@@ -467,7 +469,7 @@ static void udp_send_tap_icmp4(const struct ctx *c,
* @dlen: Length of the read part of original UDP message body
* @flow: IPv6 flow identifier
*/
-static void udp_send_tap_icmp6(const struct ctx *c,
+static void udp_send_tap_icmp6(const struct ctx *c, unsigned int qpair,
const struct sock_extended_err *ee,
const struct flowside *toside,
const struct in6_addr *saddr,
@@ -501,7 +503,7 @@ static void udp_send_tap_icmp6(const struct ctx *c,
/* Try to obtain the MAC address of the generating node */
fwd_neigh_mac_get(c, (union inany_addr *) saddr, tap_omac);
- tap_icmp6_send(c, saddr, eaddr, &msg, tap_omac, msglen);
+ tap_icmp6_send(c, qpair, saddr, eaddr, &msg, tap_omac, msglen);
}
/**
@@ -661,12 +663,12 @@ static int udp_sock_recverr(const struct ctx *c, int s, flow_sidx_t sidx,
if (hdr->cmsg_level == IPPROTO_IP &&
(o4 = inany_v4(&otap)) && inany_v4(&toside->eaddr)) {
dlen = MIN(dlen, ICMP4_MAX_DLEN);
- udp_send_tap_icmp4(c, ee, toside, *o4, data, dlen);
+ udp_send_tap_icmp4(c, QPAIR_DEFAULT, ee, toside, *o4, data, dlen);
return 1;
}
if (hdr->cmsg_level == IPPROTO_IPV6 && !inany_v4(&toside->eaddr)) {
- udp_send_tap_icmp6(c, ee, toside, &otap.a6, data, dlen,
+ udp_send_tap_icmp6(c, QPAIR_DEFAULT, ee, toside, &otap.a6, data, dlen,
FLOW_IDX(uflow));
return 1;
}
@@ -859,8 +861,8 @@ static void udp_buf_sock_to_tap(const struct ctx *c, int s, int n,
* @port: Our (local) port number of @s
* @now: Current timestamp
*/
-void udp_sock_fwd(const struct ctx *c, int s, uint8_t frompif,
- in_port_t port, const struct timespec *now)
+void udp_sock_fwd(const struct ctx *c, int s, uint8_t frompif, in_port_t port,
+ const struct timespec *now)
{
union sockaddr_inany src;
union inany_addr dst;
@@ -938,8 +940,8 @@ void udp_listen_sock_handler(const struct ctx *c,
* @events: epoll events bitmap
* @now: Current timestamp
*/
-void udp_sock_handler(const struct ctx *c, union epoll_ref ref,
- uint32_t events, const struct timespec *now)
+void udp_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events,
+ const struct timespec *now)
{
struct udp_flow *uflow = udp_at_sidx(ref.flowside);
diff --git a/udp.h b/udp.h
index 03e8dc548f82..2bf788ea3775 100644
--- a/udp.h
+++ b/udp.h
@@ -9,8 +9,8 @@
void udp_portmap_clear(void);
void udp_listen_sock_handler(const struct ctx *c, union epoll_ref ref,
uint32_t events, const struct timespec *now);
-void udp_sock_handler(const struct ctx *c, union epoll_ref ref,
- uint32_t events, const struct timespec *now);
+void udp_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events,
+ const struct timespec *now);
int udp_tap_handler(const struct ctx *c, uint8_t pif,
sa_family_t af, const void *saddr, const void *daddr,
uint8_t ttl, const struct pool *p, int idx,
diff --git a/udp_internal.h b/udp_internal.h
index 96d11cff6833..ed13c5aec8d5 100644
--- a/udp_internal.h
+++ b/udp_internal.h
@@ -28,7 +28,7 @@ size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
const struct flowside *toside, size_t dlen,
bool no_udp_csum);
-void udp_sock_fwd(const struct ctx *c, int s, uint8_t frompif,
- in_port_t port, const struct timespec *now);
+void udp_sock_fwd(const struct ctx *c, int s, uint8_t frompif, in_port_t port,
+ const struct timespec *now);
#endif /* UDP_INTERNAL_H */
diff --git a/udp_vu.c b/udp_vu.c
index c30dcf97698f..35e29f85a465 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -207,8 +207,9 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
{
const struct flowside *toside = flowside_at_sidx(tosidx);
bool v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
+ int rx_queue = QPAIR_TOGUEST_QUEUE(QPAIR_DEFAULT);
struct vu_dev *vdev = c->vdev;
- struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
+ struct vu_virtq *vq = &vdev->vq[rx_queue];
int i;
for (i = 0; i < n; i++) {
diff --git a/vhost_user.c b/vhost_user.c
index 772ba97759d9..7b6295b59f3f 100644
--- a/vhost_user.c
+++ b/vhost_user.c
@@ -811,7 +811,9 @@ static bool vu_set_vring_kick_exec(struct vu_dev *vdev,
vdev->vq[idx].started = true;
- if (vdev->vq[idx].kick_fd != -1 && VHOST_USER_IS_QUEUE_TX(idx)) {
+ if (vdev->vq[idx].kick_fd != -1 &&
+ QPAIR_FROMGUEST_QUEUE(QPAIR_FROM_QUEUE(idx)) ==
+ (unsigned int)idx) {
vu_set_watch(vdev, idx);
debug("Waiting for kicks on fd: %d for vq: %d",
vdev->vq[idx].kick_fd, idx);
diff --git a/vhost_user.h b/vhost_user.h
index e806a9e54e00..b0dad5aeadf9 100644
--- a/vhost_user.h
+++ b/vhost_user.h
@@ -203,12 +203,6 @@ struct vhost_user_msg {
/* index of the RX virtqueue */
#define VHOST_USER_RX_QUEUE 0
-/* index of the TX virtqueue */
-#define VHOST_USER_TX_QUEUE 1
-
-/* in case of multiqueue, the RX and TX queues are interleaved */
-#define VHOST_USER_IS_QUEUE_TX(n) (n % 2)
-#define VHOST_USER_IS_QUEUE_RX(n) (!(n % 2))
/* Default virtio-net header for passt */
#define VU_HEADER ((struct virtio_net_hdr){ \
diff --git a/vu_common.c b/vu_common.c
index c682498fb555..ce945150a0ce 100644
--- a/vu_common.c
+++ b/vu_common.c
@@ -168,7 +168,8 @@ static void vu_handle_tx(struct vu_dev *vdev, int index,
int out_sg_count;
int count;
- ASSERT(VHOST_USER_IS_QUEUE_TX(index));
+ ASSERT(QPAIR_FROMGUEST_QUEUE(QPAIR_FROM_QUEUE(index)) ==
+ (unsigned int)index);
tap_flush_pools();
@@ -196,11 +197,12 @@ static void vu_handle_tx(struct vu_dev *vdev, int index,
data = IOV_TAIL(elem[count].out_sg, elem[count].out_num, 0);
if (IOV_DROP_HEADER(&data, struct virtio_net_hdr_mrg_rxbuf))
- tap_add_packet(vdev->context, &data, now);
+ tap_add_packet(vdev->context, QPAIR_FROM_QUEUE(index),
+ &data, now);
count++;
}
- tap_handler(vdev->context, now);
+ tap_handler(vdev->context, QPAIR_FROM_QUEUE(index), now);
if (count) {
int i;
@@ -230,28 +232,32 @@ void vu_kick_cb(struct vu_dev *vdev, union epoll_ref ref,
trace("vhost-user: got kick_data: %016"PRIx64" idx: %d",
kick_data, ref.queue);
- if (VHOST_USER_IS_QUEUE_TX(ref.queue))
+ if (QPAIR_FROMGUEST_QUEUE(QPAIR_FROM_QUEUE(ref.queue)) ==
+ (unsigned int)ref.queue)
vu_handle_tx(vdev, ref.queue, now);
}
/**
- * vu_send_single() - Send a buffer to the front-end using the RX virtqueue
- * @c: execution context
+ * vu_send_single() - Send a buffer to the front-end using a specified virtqueue
+ * @c: Execution context
+ * @qpair: Queue pair on which to send the buffer
* @buf: address of the buffer
* @size: size of the buffer
*
* Return: number of bytes sent, -1 if there is an error
*/
-int vu_send_single(const struct ctx *c, const void *buf, size_t size)
+int vu_send_single(const struct ctx *c, unsigned int qpair, const void *buf, size_t size)
{
struct vu_dev *vdev = c->vdev;
- struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE];
struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
+ struct vu_virtq *vq;
size_t total;
int elem_cnt;
int i;
+ vq = &vdev->vq[QPAIR_TOGUEST_QUEUE(qpair)];
+
trace("vu_send_single size %zu", size);
if (!vu_queue_enabled(vq) || !vu_queue_started(vq)) {
diff --git a/vu_common.h b/vu_common.h
index 27fe7e0b3457..ddda59fda961 100644
--- a/vu_common.h
+++ b/vu_common.h
@@ -56,7 +56,8 @@ void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq,
struct vu_virtq_element *elem, int elem_cnt);
void vu_kick_cb(struct vu_dev *vdev, union epoll_ref ref,
const struct timespec *now);
-int vu_send_single(const struct ctx *c, const void *buf, size_t size);
+int vu_send_single(const struct ctx *c, unsigned int qpair, const void *buf,
+ size_t size);
void vu_pad(struct iovec *iov, size_t l2len);
#endif /* VU_COMMON_H */
--
2.51.1
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH v4 6/6] flow: Add queue pair tracking to flow management
2025-12-15 12:40 [PATCH v4 0/6] vhost-user: Add multiqueue support Laurent Vivier
` (4 preceding siblings ...)
2025-12-15 12:40 ` [PATCH v4 5/6] tap: Convert packet pools to per-queue-pair arrays for multiqueue Laurent Vivier
@ 2025-12-15 12:40 ` Laurent Vivier
5 siblings, 0 replies; 7+ messages in thread
From: Laurent Vivier @ 2025-12-15 12:40 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
For multiqueue support, we need to ensure packets are routed to the
correct RX queue based on which TX queue they originated from. This
requires tracking the queue pair association for each flow.
Add a qpair field to struct flow_common to store the queue pair number
for each flow (FLOW_QPAIR_INVALID if not assigned). The field uses 5
bits, allowing support for up to 31 queue pairs (index 31 is reserved
for FLOW_QPAIR_INVALID), which we verify is sufficient for
VHOST_USER_MAX_VQS via static assertion.
Introduce flow_qp() to retrieve the queue pair for a flow (returning 0
for NULL flows or flows without a valid assignment), and flow_setqp()
to assign queue pairs. Update all protocol handlers (TCP, UDP, ICMP)
and their tap handlers to accept a qpair parameter and assign it to
flows using FLOW_SETQP().
The implementation updates the queue pair assignment on every packet
received from TX. This follows the virtio specification's requirement
for automatic receive steering: "After the driver transmitted a packet
of a flow on transmitqX, the device SHOULD cause incoming packets for
that flow to be steered to receiveqX." By tracking the most recent TX
queue for each flow, we ensure return traffic is directed to the
corresponding RX queue, maintaining flow affinity across queue pairs.
The vhost-user code now uses FLOW_QP() to select the appropriate RX
queue when sending packets, ensuring they're routed based on the
originating TX queue rather than always using queue 0.
Note that flows initiated from the host side (via sockets, for example
udp_flow_from_sock()) currently default to queue pair 0, as they don't
have an associated incoming queue to derive the assignment from.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
flow.c | 33 +++++++++++++++++++++++++++++++++
flow.h | 17 +++++++++++++++++
icmp.c | 31 ++++++++++++++++++-------------
icmp.h | 4 ++--
tap.c | 8 ++++----
tcp.c | 33 +++++++++++++++++++--------------
tcp_vu.c | 8 +++++---
udp.c | 29 ++++++++++++++++-------------
udp.h | 2 +-
udp_flow.c | 15 ++++++++++-----
udp_flow.h | 2 +-
udp_vu.c | 3 ++-
12 files changed, 128 insertions(+), 57 deletions(-)
diff --git a/flow.c b/flow.c
index 4f53486586cd..f38c74a5cbe5 100644
--- a/flow.c
+++ b/flow.c
@@ -402,6 +402,38 @@ void flow_epollid_register(int epollid, int epollfd)
epoll_id_to_fd[epollid] = epollfd;
}
+/**
+ * flow_qp() - Get the queue pair for a flow
+ * @f: Flow to query (may be NULL)
+ *
+ * Return: queue pair number for the flow, or 0 if flow is NULL or has no
+ * valid queue pair assignment
+ */
+unsigned int flow_qp(const struct flow_common *f)
+{
+ if (f == NULL || f->qpair == FLOW_QPAIR_INVALID)
+ return QPAIR_DEFAULT;
+ return f->qpair;
+}
+
+/**
+ * flow_setqp() - Set queue pair assignment for a flow
+ * @f: Flow to update
+ * @qpair: Queue pair number to assign
+ */
+void flow_setqp(struct flow_common *f, unsigned int qpair)
+{
+ ASSERT(qpair < FLOW_QPAIR_MAX);
+
+ if (f->qpair == qpair)
+ return;
+
+ flow_trace((union flow *)f, "updating queue pair from %d to %d",
+ f->qpair, qpair);
+
+ f->qpair = qpair;
+}
+
/**
* flow_initiate_() - Move flow to INI, setting pif[INISIDE]
* @flow: Flow to change state
@@ -606,6 +638,7 @@ union flow *flow_alloc(void)
flow_new_entry = flow;
memset(flow, 0, sizeof(*flow));
flow_epollid_clear(&flow->f);
+ flow->f.qpair = FLOW_QPAIR_INVALID;
flow_set_state(&flow->f, FLOW_STATE_NEW);
return flow;
diff --git a/flow.h b/flow.h
index b43b0b1dd7f2..a4a1e680227c 100644
--- a/flow.h
+++ b/flow.h
@@ -179,6 +179,8 @@ int flowside_connect(const struct ctx *c, int s,
* @side[]: Information for each side of the flow
* @tap_omac: MAC address of remote endpoint as seen from the guest
* @epollid: epollfd identifier, or EPOLLFD_ID_INVALID
+ * @qpair: Queue pair number assigned to this flow
+ * (FLOW_QPAIR_INVALID if not assigned)
*/
struct flow_common {
#ifdef __GNUC__
@@ -199,6 +201,8 @@ struct flow_common {
#define EPOLLFD_ID_BITS 8
unsigned int epollid:EPOLLFD_ID_BITS;
+#define FLOW_QPAIR_BITS 5
+ unsigned int qpair:FLOW_QPAIR_BITS;
};
#define EPOLLFD_ID_DEFAULT 0
@@ -206,6 +210,12 @@ struct flow_common {
#define EPOLLFD_ID_MAX (EPOLLFD_ID_SIZE - 1)
#define EPOLLFD_ID_INVALID EPOLLFD_ID_MAX
+#define FLOW_QPAIR_NUM (1 << FLOW_QPAIR_BITS)
+#define FLOW_QPAIR_MAX (FLOW_QPAIR_NUM - 1)
+#define FLOW_QPAIR_INVALID FLOW_QPAIR_MAX
+
+static_assert(VHOST_USER_MAX_VQS <= FLOW_QPAIR_MAX * 2);
+
#define FLOW_INDEX_BITS 17 /* 128k - 1 */
#define FLOW_MAX MAX_FROM_BITS(FLOW_INDEX_BITS)
@@ -266,6 +276,13 @@ int flow_epollfd(const struct flow_common *f);
void flow_epollid_set(struct flow_common *f, int epollid);
void flow_epollid_clear(struct flow_common *f);
void flow_epollid_register(int epollid, int epollfd);
+unsigned int flow_qp(const struct flow_common *f);
+#define FLOW_QP(flow_) \
+ (flow_qp(&(flow_)->f))
+void flow_setqp(struct flow_common *f, unsigned int qpair);
+#define FLOW_SETQP(flow_, _qpair) \
+ (flow_setqp(&(flow_)->f, _qpair))
+
void flow_defer_handler(const struct ctx *c, const struct timespec *now);
int flow_migrate_source_early(struct ctx *c, const struct migrate_stage *stage,
int fd);
diff --git a/icmp.c b/icmp.c
index fbdd0bbdf546..b80c795c8d1c 100644
--- a/icmp.c
+++ b/icmp.c
@@ -132,13 +132,13 @@ void icmp_sock_handler(const struct ctx *c, union epoll_ref ref)
const struct in_addr *daddr = inany_v4(&ini->eaddr);
ASSERT(saddr && daddr); /* Must have IPv4 addresses */
- tap_icmp4_send(c, QPAIR_DEFAULT, *saddr, *daddr, buf,
+ tap_icmp4_send(c, FLOW_QP(pingf), *saddr, *daddr, buf,
pingf->f.tap_omac, n);
} else if (pingf->f.type == FLOW_PING6) {
const struct in6_addr *saddr = &ini->oaddr.a6;
const struct in6_addr *daddr = &ini->eaddr.a6;
- tap_icmp6_send(c, QPAIR_DEFAULT, saddr, daddr, buf,
+ tap_icmp6_send(c, FLOW_QP(pingf), saddr, daddr, buf,
pingf->f.tap_omac, n);
}
return;
@@ -170,7 +170,7 @@ static void icmp_ping_close(const struct ctx *c,
*
* Return: newly opened ping flow, or NULL on failure
*/
-static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
+static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c, unsigned int qpair,
sa_family_t af, uint16_t id,
const void *saddr, const void *daddr)
{
@@ -197,6 +197,7 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
}
pingf = FLOW_SET_TYPE(flow, flowtype, ping);
+ FLOW_SETQP(pingf, qpair);
pingf->seq = -1;
@@ -238,17 +239,18 @@ cancel:
/**
* icmp_tap_handler() - Handle packets from tap
- * @c: Execution context
- * @pif: pif on which the packet is arriving
- * @af: Address family, AF_INET or AF_INET6
- * @saddr: Source address
- * @daddr: Destination address
- * @data: Single packet with ICMP/ICMPv6 header
- * @now: Current timestamp
+ * @c: Execution context
+ * @qpair: Queue pair
+ * @pif: pif on which the packet is arriving
+ * @af: Address family, AF_INET or AF_INET6
+ * @saddr: Source address
+ * @daddr: Destination address
+ * @data: Single packet with ICMP/ICMPv6 header
+ * @now: Current timestamp
*
* Return: count of consumed packets (always 1, even if malformed)
*/
-int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
+int icmp_tap_handler(const struct ctx *c, unsigned int qpair, uint8_t pif, sa_family_t af,
const void *saddr, const void *daddr,
struct iov_tail *data, const struct timespec *now)
{
@@ -304,10 +306,13 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
flow = flow_at_sidx(flow_lookup_af(c, proto, PIF_TAP,
af, saddr, daddr, id, id));
- if (flow)
+ if (flow) {
pingf = &flow->ping;
- else if (!(pingf = icmp_ping_new(c, af, id, saddr, daddr)))
+ FLOW_SETQP(pingf, qpair); /* XXX if qpair change, update epollfd */
+ } else if (!(pingf = icmp_ping_new(c, qpair, af, id, saddr, daddr))) {
return 1;
+ }
+
tgt = &pingf->f.side[TGTSIDE];
diff --git a/icmp.h b/icmp.h
index 1a0e6205f087..7b9982529fd1 100644
--- a/icmp.h
+++ b/icmp.h
@@ -10,8 +10,8 @@ struct ctx;
struct icmp_ping_flow;
void icmp_sock_handler(const struct ctx *c, union epoll_ref ref);
-int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
- const void *saddr, const void *daddr,
+int icmp_tap_handler(const struct ctx *c, unsigned int qpair, uint8_t pif,
+ sa_family_t af, const void *saddr, const void *daddr,
struct iov_tail *data, const struct timespec *now);
void icmp_init(void);
diff --git a/tap.c b/tap.c
index 332a1187aa06..f32e22f32203 100644
--- a/tap.c
+++ b/tap.c
@@ -796,7 +796,7 @@ resume:
tap_packet_debug(iph, NULL, NULL, 0, NULL, 1);
- icmp_tap_handler(c, PIF_TAP, AF_INET,
+ icmp_tap_handler(c, qpair, PIF_TAP, AF_INET,
&iph->saddr, &iph->daddr,
&data, now);
continue;
@@ -880,7 +880,7 @@ append:
if (c->no_udp)
continue;
for (k = 0; k < p->count; )
- k += udp_tap_handler(c, PIF_TAP, AF_INET,
+ k += udp_tap_handler(c, qpair, PIF_TAP, AF_INET,
&seq->saddr, &seq->daddr,
seq->ttl, p, k, now);
}
@@ -982,7 +982,7 @@ resume:
tap_packet_debug(NULL, ip6h, NULL, proto, NULL, 1);
- icmp_tap_handler(c, PIF_TAP, AF_INET6,
+ icmp_tap_handler(c, qpair, PIF_TAP, AF_INET6,
saddr, daddr, &data, now);
continue;
}
@@ -1071,7 +1071,7 @@ append:
if (c->no_udp)
continue;
for (k = 0; k < p->count; )
- k += udp_tap_handler(c, PIF_TAP, AF_INET6,
+ k += udp_tap_handler(c, qpair, PIF_TAP, AF_INET6,
&seq->saddr, &seq->daddr,
seq->hop_limit, p, k, now);
}
diff --git a/tcp.c b/tcp.c
index 9b827d4a9366..12153c2e0a02 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1615,21 +1615,23 @@ static void tcp_bind_outbound(const struct ctx *c,
/**
* tcp_conn_from_tap() - Handle connection request (SYN segment) from tap
- * @c: Execution context
- * @af: Address family, AF_INET or AF_INET6
- * @saddr: Source address, pointer to in_addr or in6_addr
- * @daddr: Destination address, pointer to in_addr or in6_addr
- * @th: TCP header from tap: caller MUST ensure it's there
- * @opts: Pointer to start of options
- * @optlen: Bytes in options: caller MUST ensure available length
- * @now: Current timestamp
+ * @c: Execution context
+ * @qpair: Queue pair for the flow
+ * @af: Address family, AF_INET or AF_INET6
+ * @saddr: Source address, pointer to in_addr or in6_addr
+ * @daddr: Destination address, pointer to in_addr or in6_addr
+ * @th: TCP header from tap: caller MUST ensure it's there
+ * @opts: Pointer to start of options
+ * @optlen: Bytes in options: caller MUST ensure available length
+ * @now: Current timestamp
*
* #syscalls:vu getsockname
*/
-static void tcp_conn_from_tap(const struct ctx *c, sa_family_t af,
- const void *saddr, const void *daddr,
- const struct tcphdr *th, const char *opts,
- size_t optlen, const struct timespec *now)
+static void tcp_conn_from_tap(const struct ctx *c, unsigned int qpair,
+ sa_family_t af, const void *saddr,
+ const void *daddr, const struct tcphdr *th,
+ const char *opts, size_t optlen,
+ const struct timespec *now)
{
in_port_t srcport = ntohs(th->source);
in_port_t dstport = ntohs(th->dest);
@@ -1740,6 +1742,7 @@ static void tcp_conn_from_tap(const struct ctx *c, sa_family_t af,
conn_event(c, conn, TAP_SYN_ACK_SENT);
}
+ FLOW_SETQP(conn, qpair);
tcp_epoll_ctl(c, conn);
if (c->mode == MODE_VU) { /* To rebind to same oport after migration */
@@ -2176,7 +2179,6 @@ static void tcp_rst_no_conn(const struct ctx *c, unsigned int qpair, int af,
/**
* tcp_tap_handler() - Handle packets from tap and state transitions
* @c: Execution context
- * @qpair: Queue pair on which to send packets
* @pif: pif on which the packet is arriving
* @af: Address family, AF_INET or AF_INET6
* @saddr: Source address
@@ -2228,7 +2230,7 @@ int tcp_tap_handler(const struct ctx *c, unsigned int qpair, uint8_t pif,
/* New connection from tap */
if (!flow) {
if (opts && th->syn && !th->ack)
- tcp_conn_from_tap(c, af, saddr, daddr, th,
+ tcp_conn_from_tap(c, qpair, af, saddr, daddr, th,
opts, optlen, now);
else
tcp_rst_no_conn(c, qpair, af, saddr, daddr, flow_lbl, th,
@@ -2240,6 +2242,9 @@ int tcp_tap_handler(const struct ctx *c, unsigned int qpair, uint8_t pif,
ASSERT(pif_at_sidx(sidx) == PIF_TAP);
conn = &flow->tcp;
+ /* update queue pair */
+ FLOW_SETQP(flow, qpair);
+
flow_trace(conn, "packet length %zu from tap", l4len);
if (th->rst) {
diff --git a/tcp_vu.c b/tcp_vu.c
index db9db78aaaed..3e7d85798aea 100644
--- a/tcp_vu.c
+++ b/tcp_vu.c
@@ -71,14 +71,15 @@ static size_t tcp_vu_hdrlen(bool v6)
int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
{
struct vu_dev *vdev = c->vdev;
- struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
- size_t optlen, hdrlen;
+ int rx_queue = FLOW_QP(conn) * 2;
+ struct vu_virtq *vq = &vdev->vq[rx_queue];
struct vu_virtq_element flags_elem[2];
struct ipv6hdr *ip6h = NULL;
struct iphdr *ip4h = NULL;
struct iovec flags_iov[2];
struct tcp_syn_opts *opts;
struct iov_tail payload;
+ size_t optlen, hdrlen;
struct tcphdr *th;
struct ethhdr *eh;
uint32_t seq;
@@ -353,7 +354,8 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
{
uint32_t wnd_scaled = conn->wnd_from_tap << conn->ws_from_tap;
struct vu_dev *vdev = c->vdev;
- struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
+ int rx_queue = FLOW_QP(conn) * 2;
+ struct vu_virtq *vq = &vdev->vq[rx_queue];
ssize_t len, previous_dlen;
int i, iov_cnt, head_cnt;
size_t hdrlen, fillsize;
diff --git a/udp.c b/udp.c
index fd584a835076..b4f6f5f74c20 100644
--- a/udp.c
+++ b/udp.c
@@ -663,12 +663,14 @@ static int udp_sock_recverr(const struct ctx *c, int s, flow_sidx_t sidx,
if (hdr->cmsg_level == IPPROTO_IP &&
(o4 = inany_v4(&otap)) && inany_v4(&toside->eaddr)) {
dlen = MIN(dlen, ICMP4_MAX_DLEN);
- udp_send_tap_icmp4(c, QPAIR_DEFAULT, ee, toside, *o4, data, dlen);
+ udp_send_tap_icmp4(c, FLOW_QP(uflow), ee, toside,
+ *o4, data, dlen);
return 1;
}
if (hdr->cmsg_level == IPPROTO_IPV6 && !inany_v4(&toside->eaddr)) {
- udp_send_tap_icmp6(c, QPAIR_DEFAULT, ee, toside, &otap.a6, data, dlen,
+ udp_send_tap_icmp6(c, FLOW_QP(uflow), ee,
+ toside, &otap.a6, data, dlen,
FLOW_IDX(uflow));
return 1;
}
@@ -996,21 +998,22 @@ fail:
/**
* udp_tap_handler() - Handle packets from tap
- * @c: Execution context
- * @pif: pif on which the packet is arriving
- * @af: Address family, AF_INET or AF_INET6
- * @saddr: Source address
- * @daddr: Destination address
- * @ttl: TTL or hop limit for packets to be sent in this call
- * @p: Pool of UDP packets, with UDP headers
- * @idx: Index of first packet to process
- * @now: Current timestamp
+ * @c: Execution context
+ * @qpair: Queue pair
+ * @pif: pif on which the packet is arriving
+ * @af: Address family, AF_INET or AF_INET6
+ * @saddr: Source address
+ * @daddr: Destination address
+ * @ttl: TTL or hop limit for packets to be sent in this call
+ * @p: Pool of UDP packets, with UDP headers
+ * @idx: Index of first packet to process
+ * @now: Current timestamp
*
* Return: count of consumed packets
*
* #syscalls sendmmsg
*/
-int udp_tap_handler(const struct ctx *c, uint8_t pif,
+int udp_tap_handler(const struct ctx *c, unsigned int qpair, uint8_t pif,
sa_family_t af, const void *saddr, const void *daddr,
uint8_t ttl, const struct pool *p, int idx,
const struct timespec *now)
@@ -1043,7 +1046,7 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
src = ntohs(uh->source);
dst = ntohs(uh->dest);
- tosidx = udp_flow_from_tap(c, pif, af, saddr, daddr, src, dst, now);
+ tosidx = udp_flow_from_tap(c, qpair, pif, af, saddr, daddr, src, dst, now);
if (!(uflow = udp_at_sidx(tosidx))) {
char sstr[INET6_ADDRSTRLEN], dstr[INET6_ADDRSTRLEN];
diff --git a/udp.h b/udp.h
index 2bf788ea3775..2f17c357e906 100644
--- a/udp.h
+++ b/udp.h
@@ -11,7 +11,7 @@ void udp_listen_sock_handler(const struct ctx *c, union epoll_ref ref,
uint32_t events, const struct timespec *now);
void udp_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events,
const struct timespec *now);
-int udp_tap_handler(const struct ctx *c, uint8_t pif,
+int udp_tap_handler(const struct ctx *c, unsigned int qpair, uint8_t pif,
sa_family_t af, const void *saddr, const void *daddr,
uint8_t ttl, const struct pool *p, int idx,
const struct timespec *now);
diff --git a/udp_flow.c b/udp_flow.c
index 8907f2f72741..c30939090b27 100644
--- a/udp_flow.c
+++ b/udp_flow.c
@@ -146,8 +146,8 @@ static int udp_flow_sock(const struct ctx *c,
*
* #syscalls getsockname
*/
-static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow,
- const struct timespec *now)
+static flow_sidx_t udp_flow_new(const struct ctx *c, unsigned int qpair,
+ union flow *flow, const struct timespec *now)
{
struct udp_flow *uflow = NULL;
const struct flowside *tgt;
@@ -160,6 +160,7 @@ static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow,
uflow->ts = now->tv_sec;
uflow->s[INISIDE] = uflow->s[TGTSIDE] = -1;
uflow->ttl[INISIDE] = uflow->ttl[TGTSIDE] = 0;
+ FLOW_SETQP(uflow, qpair);
flow_foreach_sidei(sidei) {
if (pif_is_socket(uflow->f.pif[sidei]))
@@ -260,23 +261,25 @@ flow_sidx_t udp_flow_from_sock(const struct ctx *c, uint8_t pif,
return FLOW_SIDX_NONE;
}
- return udp_flow_new(c, flow, now);
+ return udp_flow_new(c, QPAIR_DEFAULT, flow, now);
}
/**
* udp_flow_from_tap() - Find or create UDP flow for tap packets
* @c: Execution context
+ * @qpair: Queue pair for the flow
* @pif: pif on which the packet is arriving
* @af: Address family, AF_INET or AF_INET6
* @saddr: Source address on guest side
* @daddr: Destination address guest side
* @srcport: Source port on guest side
* @dstport: Destination port on guest side
+ * @now: Current timestamp
*
* Return: sidx for the destination side of the flow for this packet, or
* FLOW_SIDX_NONE if we couldn't find or create a flow.
*/
-flow_sidx_t udp_flow_from_tap(const struct ctx *c,
+flow_sidx_t udp_flow_from_tap(const struct ctx *c, unsigned int qpair,
uint8_t pif, sa_family_t af,
const void *saddr, const void *daddr,
in_port_t srcport, in_port_t dstport,
@@ -293,6 +296,8 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c,
srcport, dstport);
if ((uflow = udp_at_sidx(sidx))) {
uflow->ts = now->tv_sec;
+ /* update qpair */
+ FLOW_SETQP(uflow, qpair); /* if qpair changes, update epollfd */
return flow_sidx_opposite(sidx);
}
@@ -316,7 +321,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c,
return FLOW_SIDX_NONE;
}
- return udp_flow_new(c, flow, now);
+ return udp_flow_new(c, qpair, flow, now);
}
/**
diff --git a/udp_flow.h b/udp_flow.h
index 4c528e95ca66..03e6ecdcbaf2 100644
--- a/udp_flow.h
+++ b/udp_flow.h
@@ -36,7 +36,7 @@ flow_sidx_t udp_flow_from_sock(const struct ctx *c, uint8_t pif,
const union inany_addr *dst, in_port_t port,
const union sockaddr_inany *s_in,
const struct timespec *now);
-flow_sidx_t udp_flow_from_tap(const struct ctx *c,
+flow_sidx_t udp_flow_from_tap(const struct ctx *c, unsigned int qpair,
uint8_t pif, sa_family_t af,
const void *saddr, const void *daddr,
in_port_t srcport, in_port_t dstport,
diff --git a/udp_vu.c b/udp_vu.c
index 35e29f85a465..019262564a60 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -206,8 +206,9 @@ static void udp_vu_csum(const struct flowside *toside, int iov_used)
void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
{
const struct flowside *toside = flowside_at_sidx(tosidx);
+ const struct udp_flow *uflow = udp_at_sidx(tosidx);
bool v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
- int rx_queue = QPAIR_TOGUEST_QUEUE(QPAIR_DEFAULT);
+ int rx_queue = QPAIR_TOGUEST_QUEUE(FLOW_QP(uflow));
struct vu_dev *vdev = c->vdev;
struct vu_virtq *vq = &vdev->vq[rx_queue];
int i;
--
2.51.1
^ permalink raw reply [flat|nested] 7+ messages in thread