From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>, passt-dev@passt.top
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v2 11/12] tap: Don't size pool_tap[46] for the maximum number of packets
Date: Fri, 20 Dec 2024 19:35:34 +1100 [thread overview]
Message-ID: <20241220083535.1372523-12-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20241220083535.1372523-1-david@gibson.dropbear.id.au>
Currently we attempt to size pool_tap[46] so they have room for the maximum
possible number of packets that could fit in pkt_buf, TAP_MSGS. However,
the calculation isn't quite correct: TAP_MSGS is based on ETH_ZLEN (60) as
the minimum possible L2 frame size. But, we don't enforce that L2 frames
are at least ETH_ZLEN when we receive them from the tap backend, and since
we're dealing with virtual interfaces we don't have the physical Ethernet
limitations requiring that length. Indeed it is possible to generate a
legitimate frame smaller than that (e.g. a zero-payload UDP/IPv4 frame on
the 'pasta' backend is only 42 bytes long).
It's also unclear if this limit is sufficient for vhost-user which isn't
limited by the size of pkt_buf as the other modes are.
We could attempt to correct the calculation, but that would leave us with
even larger arrays, which in practice rarely accumulate more than a handful
of packets. So, instead, put an arbitrary cap on the number of packets we
can put in a batch, and if we run out of space, process and flush the
batch.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
packet.c | 13 ++++++++++++-
packet.h | 3 +++
passt.h | 2 --
tap.c | 18 +++++++++++++++---
tap.h | 3 ++-
vu_common.c | 3 ++-
6 files changed, 34 insertions(+), 8 deletions(-)
diff --git a/packet.c b/packet.c
index 9e0e6555..c3b80924 100644
--- a/packet.c
+++ b/packet.c
@@ -55,6 +55,17 @@ static void packet_check_range(const struct pool *p,
func, line);
}
+/**
+ * pool_full() - Is a packet pool full?
+ * @p: Pointer to packet pool
+ *
+ * Return: true if the pool is full, false if more packets can be added
+ */
+bool pool_full(const struct pool *p)
+{
+ return p->count >= p->size;
+}
+
/**
* packet_add_do() - Add data as packet descriptor to given pool
* @p: Existing pool
@@ -68,7 +79,7 @@ void packet_add_do(struct pool *p, size_t len, const char *start,
{
size_t idx = p->count;
- if (idx >= p->size) {
+ if (pool_full(p)) {
debug("add packet index %zu to pool with size %zu, %s:%i",
idx, p->size, func, line);
return;
diff --git a/packet.h b/packet.h
index b164f77e..8d78841b 100644
--- a/packet.h
+++ b/packet.h
@@ -6,6 +6,8 @@
#ifndef PACKET_H
#define PACKET_H
+#include <stdbool.h>
+
/* Maximum size of a single packet in a pool, including all headers. Sized to
* allow a maximum size IP datagram (65535) plus some extra space or L2 and
* backend specific headers. This is just for sanity checking, so doesn't need
@@ -38,6 +40,7 @@ void packet_add_do(struct pool *p, size_t len, const char *start,
void *packet_get_do(const struct pool *p, const size_t idx,
size_t offset, size_t len, size_t *left,
const char *func, int line);
+bool pool_full(const struct pool *p);
void pool_flush(struct pool *p);
#define packet_add(p, len, start) \
diff --git a/passt.h b/passt.h
index 0dd4efa0..81b2787f 100644
--- a/passt.h
+++ b/passt.h
@@ -70,8 +70,6 @@ static_assert(sizeof(union epoll_ref) <= sizeof(union epoll_data),
#define TAP_BUF_BYTES \
ROUND_DOWN(((ETH_MAX_MTU + sizeof(uint32_t)) * 128), PAGE_SIZE)
-#define TAP_MSGS \
- DIV_ROUND_UP(TAP_BUF_BYTES, ETH_ZLEN - 2 * ETH_ALEN + sizeof(uint32_t))
#define PKT_BUF_BYTES MAX(TAP_BUF_BYTES, 0)
extern char pkt_buf [PKT_BUF_BYTES];
diff --git a/tap.c b/tap.c
index cd32a901..4421ce4d 100644
--- a/tap.c
+++ b/tap.c
@@ -61,6 +61,8 @@
#include "vhost_user.h"
#include "vu_common.h"
+#define TAP_MSGS 256
+
/* IPv4 (plus ARP) and IPv6 message batches from tap/guest to IP handlers */
static PACKET_POOL_NOINIT(pool_tap4, TAP_MSGS, pkt_buf);
static PACKET_POOL_NOINIT(pool_tap6, TAP_MSGS, pkt_buf);
@@ -966,8 +968,10 @@ void tap_handler(struct ctx *c, const struct timespec *now)
* @c: Execution context
* @l2len: Total L2 packet length
* @p: Packet buffer
+ * @now: Current timestamp
*/
-void tap_add_packet(struct ctx *c, ssize_t l2len, char *p)
+void tap_add_packet(struct ctx *c, ssize_t l2len, char *p,
+ const struct timespec *now)
{
const struct ethhdr *eh;
@@ -983,9 +987,17 @@ void tap_add_packet(struct ctx *c, ssize_t l2len, char *p)
switch (ntohs(eh->h_proto)) {
case ETH_P_ARP:
case ETH_P_IP:
+ if (pool_full(pool_tap4)) {
+ tap4_handler(c, pool_tap4, now);
+ pool_flush(pool_tap4);
+ }
packet_add(pool_tap4, l2len, p);
break;
case ETH_P_IPV6:
+ if (pool_full(pool_tap6)) {
+ tap6_handler(c, pool_tap6, now);
+ pool_flush(pool_tap6);
+ }
packet_add(pool_tap6, l2len, p);
break;
default:
@@ -1066,7 +1078,7 @@ static void tap_passt_input(struct ctx *c, const struct timespec *now)
p += sizeof(uint32_t);
n -= sizeof(uint32_t);
- tap_add_packet(c, l2len, p);
+ tap_add_packet(c, l2len, p, now);
p += l2len;
n -= l2len;
@@ -1129,7 +1141,7 @@ static void tap_pasta_input(struct ctx *c, const struct timespec *now)
len > (ssize_t)ETH_MAX_MTU)
continue;
- tap_add_packet(c, len, pkt_buf + n);
+ tap_add_packet(c, len, pkt_buf + n, now);
}
tap_handler(c, now);
diff --git a/tap.h b/tap.h
index dfbd8b9e..a1b18430 100644
--- a/tap.h
+++ b/tap.h
@@ -74,6 +74,7 @@ void tap_sock_update_pool(void *base, size_t size);
void tap_backend_init(struct ctx *c);
void tap_flush_pools(void);
void tap_handler(struct ctx *c, const struct timespec *now);
-void tap_add_packet(struct ctx *c, ssize_t l2len, char *p);
+void tap_add_packet(struct ctx *c, ssize_t l2len, char *p,
+ const struct timespec *now);
#endif /* TAP_H */
diff --git a/vu_common.c b/vu_common.c
index 528b9b08..6f49f873 100644
--- a/vu_common.c
+++ b/vu_common.c
@@ -187,7 +187,8 @@ static void vu_handle_tx(struct vu_dev *vdev, int index,
tap_add_packet(vdev->context,
elem[count].out_sg[0].iov_len - hdrlen,
- (char *)elem[count].out_sg[0].iov_base + hdrlen);
+ (char *)elem[count].out_sg[0].iov_base + hdrlen,
+ now);
count++;
}
tap_handler(vdev->context, now);
--
@@ -187,7 +187,8 @@ static void vu_handle_tx(struct vu_dev *vdev, int index,
tap_add_packet(vdev->context,
elem[count].out_sg[0].iov_len - hdrlen,
- (char *)elem[count].out_sg[0].iov_base + hdrlen);
+ (char *)elem[count].out_sg[0].iov_base + hdrlen,
+ now);
count++;
}
tap_handler(vdev->context, now);
--
2.47.1
next prev parent reply other threads:[~2024-12-20 8:58 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-20 8:35 [PATCH v2 00/12] Cleanups to packet pool handling and sizing David Gibson
2024-12-20 8:35 ` [PATCH v2 01/12] test focus David Gibson
2024-12-20 8:35 ` [PATCH v2 02/12] hack: stop on fail, but not perf fail David Gibson
2024-12-20 8:35 ` [PATCH v2 03/12] make passt dumpable David Gibson
2024-12-20 8:35 ` [PATCH v2 04/12] packet: Use flexible array member in struct pool David Gibson
2024-12-20 8:35 ` [PATCH v2 05/12] packet: Don't pass start and offset separately too packet_check_range() David Gibson
2024-12-20 8:35 ` [PATCH v2 06/12] packet: Don't hard code maximum packet size to UINT16_MAX David Gibson
2024-12-20 8:35 ` [PATCH v2 07/12] packet: Remove unhelpful packet_get_try() macro David Gibson
2024-12-20 8:35 ` [PATCH v2 08/12] util: Add abort_with_msg() and ASSERT_WITH_MSG() helpers David Gibson
2024-12-20 8:35 ` [PATCH v2 09/12] packet: Distinguish severities of different packet_{add,git}_do() errors David Gibson
2024-12-20 8:35 ` [PATCH v2 10/12] packet: Move packet length checks into packet_check_range() David Gibson
2024-12-20 8:35 ` David Gibson [this message]
2024-12-20 8:35 ` [PATCH v2 12/12] packet: More cautious checks to avoid pointer arithmetic UB David Gibson
2024-12-20 9:00 ` [PATCH v2 00/12] Cleanups to packet pool handling and sizing David Gibson
2024-12-20 10:06 ` Stefano Brivio
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241220083535.1372523-12-david@gibson.dropbear.id.au \
--to=david@gibson.dropbear.id.au \
--cc=passt-dev@passt.top \
--cc=sbrivio@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).