From: David Gibson <david@gibson.dropbear.id.au>
To: passt-dev@passt.top, Stefano Brivio <sbrivio@redhat.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH 2/3] packet: Don't have struct pool specify its buffer
Date: Fri, 13 Dec 2024 23:01:55 +1100 [thread overview]
Message-ID: <20241213120156.4123972-3-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20241213120156.4123972-1-david@gibson.dropbear.id.au>
struct pool, which represents a batch of packets includes values giving
the buffer in which all the packets lie - or for vhost_user a link to the
vu_dev_region array in which the packets sit. Originally that made sense
because we stored each packet as an offset and length within that buffer.
However dd143e389 ("packet: replace struct desc by struct iovec") replaced
the offset and length with a struct iovec which can directly reference a
packet anywhere in memory. This means we no longer need the buffer
reference to interpret packets from the pool. So there's really no need
to check where the packet sits. We can remove the buf reference and all
checks associated with it. As a bonus this removes the special case for
vhost-user.
Similarly the old representation used a 16-bit length, so there were some
checks that packets didn't exceed that. That's also no longer necessary
with the struct iovec which uses a size_t length.
I think under an unlikely set of circumstances it might have been possible
to hit that 16-bit limit for a legitimate packet: other parts of the code
place a limit of 65535 bytes on the L2 frame, however that doesn't include
the length tag used by the qemu socket protocol. That tag *is* included in
the packet as stored in the pool, however, meaning we could get a 65539
byte packet at this level.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
packet.c | 60 ----------------------------------------------------
packet.h | 36 +++++++++----------------------
tap.c | 45 +++++++++++++--------------------------
tap.h | 1 -
vhost_user.c | 2 --
vu_common.c | 28 ------------------------
6 files changed, 25 insertions(+), 147 deletions(-)
diff --git a/packet.c b/packet.c
index 03a11e6a..5bfa7304 100644
--- a/packet.c
+++ b/packet.c
@@ -22,46 +22,6 @@
#include "util.h"
#include "log.h"
-/**
- * packet_check_range() - Check if a packet memory range is valid
- * @p: Packet pool
- * @offset: Offset of data range in packet descriptor
- * @len: Length of desired data range
- * @start: Start of the packet descriptor
- * @func: For tracing: name of calling function
- * @line: For tracing: caller line of function call
- *
- * Return: 0 if the range is valid, -1 otherwise
- */
-static int packet_check_range(const struct pool *p, size_t offset, size_t len,
- const char *start, const char *func, int line)
-{
- if (p->buf_size == 0) {
- int ret;
-
- ret = vu_packet_check_range((void *)p->buf, offset, len, start);
-
- if (ret == -1)
- trace("cannot find region, %s:%i", func, line);
-
- return ret;
- }
-
- if (start < p->buf) {
- trace("packet start %p before buffer start %p, "
- "%s:%i", (void *)start, (void *)p->buf, func, line);
- return -1;
- }
-
- if (start + len + offset > p->buf + p->buf_size) {
- trace("packet offset plus length %zu from size %zu, "
- "%s:%i", start - p->buf + len + offset,
- p->buf_size, func, line);
- return -1;
- }
-
- return 0;
-}
/**
* packet_add_do() - Add data as packet descriptor to given pool
* @p: Existing pool
@@ -81,14 +41,6 @@ void packet_add_do(struct pool *p, size_t len, const char *start,
return;
}
- if (packet_check_range(p, 0, len, start, func, line))
- return;
-
- if (len > UINT16_MAX) {
- trace("add packet length %zu, %s:%i", len, func, line);
- return;
- }
-
p->pkt[idx].iov_base = (void *)start;
p->pkt[idx].iov_len = len;
@@ -118,14 +70,6 @@ void *packet_get_do(const struct pool *p, size_t idx, size_t offset,
return NULL;
}
- if (len > UINT16_MAX) {
- if (func) {
- trace("packet data length %zu, %s:%i",
- len, func, line);
- }
- return NULL;
- }
-
if (len + offset > p->pkt[idx].iov_len) {
if (func) {
trace("data length %zu, offset %zu from length %zu, "
@@ -135,10 +79,6 @@ void *packet_get_do(const struct pool *p, size_t idx, size_t offset,
return NULL;
}
- if (packet_check_range(p, offset, len, p->pkt[idx].iov_base,
- func, line))
- return NULL;
-
if (left)
*left = p->pkt[idx].iov_len - offset - len;
diff --git a/packet.h b/packet.h
index 85ee5508..98eb8812 100644
--- a/packet.h
+++ b/packet.h
@@ -7,25 +7,17 @@
#define PACKET_H
/**
- * struct pool - Generic pool of packets stored in a buffer
- * @buf: Buffer storing packet descriptors,
- * a struct vu_dev_region array for passt vhost-user mode
- * @buf_size: Total size of buffer,
- * 0 for passt vhost-user mode
+ * struct pool - Generic pool of packets stored in nmemory
* @size: Number of usable descriptors for the pool
* @count: Number of used descriptors for the pool
* @pkt: Descriptors: see macros below
*/
struct pool {
- char *buf;
- size_t buf_size;
size_t size;
size_t count;
struct iovec pkt[];
};
-int vu_packet_check_range(void *buf, size_t offset, size_t len,
- const char *start);
void packet_add_do(struct pool *p, size_t len, const char *start,
const char *func, int line);
void *packet_get_do(const struct pool *p, const size_t idx,
@@ -42,35 +34,27 @@ void pool_flush(struct pool *p);
#define packet_get_try(p, idx, offset, len, left) \
packet_get_do(p, idx, offset, len, left, NULL, 0)
-#define PACKET_POOL_DECL(_name, _size, _buf) \
+#define PACKET_POOL_DECL(_name, _size) \
struct _name ## _t { \
- char *buf; \
- size_t buf_size; \
size_t size; \
size_t count; \
struct iovec pkt[_size]; \
}
-#define PACKET_POOL_INIT_NOCAST(_size, _buf, _buf_size) \
+#define PACKET_POOL_INIT_NOCAST(_size) \
{ \
- .buf_size = _buf_size, \
- .buf = _buf, \
.size = _size, \
}
-#define PACKET_POOL(name, size, buf, buf_size) \
- PACKET_POOL_DECL(name, size, buf) name = \
- PACKET_POOL_INIT_NOCAST(size, buf, buf_size)
+#define PACKET_POOL(name, size) \
+ PACKET_POOL_DECL(name, size) name = \
+ PACKET_POOL_INIT_NOCAST(size)
-#define PACKET_INIT(name, size, buf, buf_size) \
- (struct name ## _t) PACKET_POOL_INIT_NOCAST(size, buf, buf_size)
+#define PACKET_INIT(name, size) \
+ (struct name ## _t) PACKET_POOL_INIT_NOCAST(size)
-#define PACKET_POOL_NOINIT(name, size, buf) \
- PACKET_POOL_DECL(name, size, buf) name ## _storage; \
- static struct pool *name = (struct pool *)&name ## _storage
-
-#define PACKET_POOL_P(name, size, buf, buf_size) \
- PACKET_POOL(name ## _storage, size, buf, buf_size); \
+#define PACKET_POOL_P(name, size) \
+ PACKET_POOL(name ## _storage, size); \
struct pool *name = (struct pool *)&name ## _storage
#endif /* PACKET_H */
diff --git a/tap.c b/tap.c
index cd32a901..68231f09 100644
--- a/tap.c
+++ b/tap.c
@@ -62,8 +62,8 @@
#include "vu_common.h"
/* IPv4 (plus ARP) and IPv6 message batches from tap/guest to IP handlers */
-static PACKET_POOL_NOINIT(pool_tap4, TAP_MSGS, pkt_buf);
-static PACKET_POOL_NOINIT(pool_tap6, TAP_MSGS, pkt_buf);
+static PACKET_POOL_P(pool_tap4, TAP_MSGS);
+static PACKET_POOL_P(pool_tap6, TAP_MSGS);
#define TAP_SEQS 128 /* Different L4 tuples in one batch */
#define FRAGMENT_MSG_RATE 10 /* # seconds between fragment warnings */
@@ -462,7 +462,7 @@ void eth_update_mac(struct ethhdr *eh,
memcpy(eh->h_source, eth_s, sizeof(eh->h_source));
}
-PACKET_POOL_DECL(pool_l4, UIO_MAXIOV, pkt_buf);
+PACKET_POOL_DECL(pool_l4, UIO_MAXIOV);
/**
* struct l4_seq4_t - Message sequence for one protocol handler call, IPv4
@@ -618,7 +618,7 @@ resume:
if (!eh)
continue;
if (ntohs(eh->h_proto) == ETH_P_ARP) {
- PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
+ PACKET_POOL_P(pkt, 1);
packet_add(pkt, l2len, (char *)eh);
arp(c, pkt);
@@ -658,7 +658,7 @@ resume:
continue;
if (iph->protocol == IPPROTO_ICMP) {
- PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
+ PACKET_POOL_P(pkt, 1);
if (c->no_icmp)
continue;
@@ -677,7 +677,7 @@ resume:
continue;
if (iph->protocol == IPPROTO_UDP) {
- PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
+ PACKET_POOL_P(pkt, 1);
packet_add(pkt, l2len, (char *)eh);
if (dhcp(c, pkt))
@@ -829,7 +829,7 @@ resume:
}
if (proto == IPPROTO_ICMPV6) {
- PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
+ PACKET_POOL_P(pkt, 1);
if (c->no_icmp)
continue;
@@ -854,7 +854,7 @@ resume:
uh = (struct udphdr *)l4h;
if (proto == IPPROTO_UDP) {
- PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
+ PACKET_POOL_P(pkt, 1);
packet_add(pkt, l4len, l4h);
@@ -1381,36 +1381,21 @@ static void tap_sock_tun_init(struct ctx *c)
}
/**
- * tap_sock_update_pool() - Set the buffer base and size for the pool of packets
- * @base: Buffer base
- * @size Buffer size
+ * tap_backend_init() - Create and set up AF_UNIX socket or
+ * tuntap file descriptor
+ * @c: Execution context
*/
-void tap_sock_update_pool(void *base, size_t size)
+void tap_backend_init(struct ctx *c)
{
int i;
- pool_tap4_storage = PACKET_INIT(pool_tap4, TAP_MSGS, base, size);
- pool_tap6_storage = PACKET_INIT(pool_tap6, TAP_MSGS, base, size);
-
for (i = 0; i < TAP_SEQS; i++) {
- tap4_l4[i].p = PACKET_INIT(pool_l4, UIO_MAXIOV, base, size);
- tap6_l4[i].p = PACKET_INIT(pool_l4, UIO_MAXIOV, base, size);
+ tap4_l4[i].p = PACKET_INIT(pool_l4, UIO_MAXIOV);
+ tap6_l4[i].p = PACKET_INIT(pool_l4, UIO_MAXIOV);
}
-}
-/**
- * tap_backend_init() - Create and set up AF_UNIX socket or
- * tuntap file descriptor
- * @c: Execution context
- */
-void tap_backend_init(struct ctx *c)
-{
- if (c->mode == MODE_VU) {
- tap_sock_update_pool(NULL, 0);
+ if (c->mode == MODE_VU)
vu_init(c);
- } else {
- tap_sock_update_pool(pkt_buf, sizeof(pkt_buf));
- }
if (c->fd_tap != -1) { /* Passed as --fd */
ASSERT(c->one_off);
diff --git a/tap.h b/tap.h
index dfbd8b9e..9731f70c 100644
--- a/tap.h
+++ b/tap.h
@@ -70,7 +70,6 @@ void tap_handler_passt(struct ctx *c, uint32_t events,
const struct timespec *now);
int tap_sock_unix_open(char *sock_path);
void tap_sock_reset(struct ctx *c);
-void tap_sock_update_pool(void *base, size_t size);
void tap_backend_init(struct ctx *c);
void tap_flush_pools(void);
void tap_handler(struct ctx *c, const struct timespec *now);
diff --git a/vhost_user.c b/vhost_user.c
index 4b8558fa..77d36766 100644
--- a/vhost_user.c
+++ b/vhost_user.c
@@ -494,8 +494,6 @@ static bool vu_set_mem_table_exec(struct vu_dev *vdev,
ASSERT(vdev->nregions < VHOST_USER_MAX_RAM_SLOTS - 1);
vdev->regions[vdev->nregions].mmap_addr = 0;
- tap_sock_update_pool(vdev->regions, 0);
-
return false;
}
diff --git a/vu_common.c b/vu_common.c
index 299b5a32..bb794193 100644
--- a/vu_common.c
+++ b/vu_common.c
@@ -18,34 +18,6 @@
#include "pcap.h"
#include "vu_common.h"
-/**
- * vu_packet_check_range() - Check if a given memory zone is contained in
- * a mapped guest memory region
- * @buf: Array of the available memory regions
- * @offset: Offset of data range in packet descriptor
- * @size: Length of desired data range
- * @start: Start of the packet descriptor
- *
- * Return: 0 if the zone is in a mapped memory region, -1 otherwise
- */
-int vu_packet_check_range(void *buf, size_t offset, size_t len,
- const char *start)
-{
- struct vu_dev_region *dev_region;
-
- for (dev_region = buf; dev_region->mmap_addr; dev_region++) {
- /* NOLINTNEXTLINE(performance-no-int-to-ptr) */
- char *m = (char *)(uintptr_t)dev_region->mmap_addr;
-
- if (m <= start &&
- start + offset + len <= m + dev_region->mmap_offset +
- dev_region->size)
- return 0;
- }
-
- return -1;
-}
-
/**
* vu_init_elem() - initialize an array of virtqueue elements with 1 iov in each
* @elem: Array of virtqueue elements to initialize
--
@@ -18,34 +18,6 @@
#include "pcap.h"
#include "vu_common.h"
-/**
- * vu_packet_check_range() - Check if a given memory zone is contained in
- * a mapped guest memory region
- * @buf: Array of the available memory regions
- * @offset: Offset of data range in packet descriptor
- * @size: Length of desired data range
- * @start: Start of the packet descriptor
- *
- * Return: 0 if the zone is in a mapped memory region, -1 otherwise
- */
-int vu_packet_check_range(void *buf, size_t offset, size_t len,
- const char *start)
-{
- struct vu_dev_region *dev_region;
-
- for (dev_region = buf; dev_region->mmap_addr; dev_region++) {
- /* NOLINTNEXTLINE(performance-no-int-to-ptr) */
- char *m = (char *)(uintptr_t)dev_region->mmap_addr;
-
- if (m <= start &&
- start + offset + len <= m + dev_region->mmap_offset +
- dev_region->size)
- return 0;
- }
-
- return -1;
-}
-
/**
* vu_init_elem() - initialize an array of virtqueue elements with 1 iov in each
* @elem: Array of virtqueue elements to initialize
--
2.47.1
next prev parent reply other threads:[~2024-12-13 12:02 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-13 12:01 [PATCH 0/3] Cleanups to packet pool handling and sizing David Gibson
2024-12-13 12:01 ` [PATCH 1/3] packet: Use flexible array member in struct pool David Gibson
2024-12-13 12:01 ` David Gibson [this message]
2024-12-19 9:00 ` [PATCH 2/3] packet: Don't have struct pool specify its buffer Stefano Brivio
2024-12-20 0:59 ` David Gibson
2024-12-20 9:51 ` Stefano Brivio
2024-12-21 6:59 ` David Gibson
2024-12-13 12:01 ` [PATCH 3/3] tap: Don't size pool_tap[46] for the maximum number of packets David Gibson
2024-12-19 9:00 ` Stefano Brivio
2024-12-20 1:13 ` David Gibson
2024-12-20 9:51 ` Stefano Brivio
2024-12-21 7:00 ` David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241213120156.4123972-3-david@gibson.dropbear.id.au \
--to=david@gibson.dropbear.id.au \
--cc=passt-dev@passt.top \
--cc=sbrivio@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).