From: David Gibson <david@gibson.dropbear.id.au>
To: passt-dev@passt.top, Stefano Brivio <sbrivio@redhat.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH 3/3] tap: Don't size pool_tap[46] for the maximum number of packets
Date: Fri, 13 Dec 2024 23:01:56 +1100 [thread overview]
Message-ID: <20241213120156.4123972-4-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20241213120156.4123972-1-david@gibson.dropbear.id.au>
Currently we attempt to size pool_tap[46] so they have room for the maximum
possible number of packets that could fit in pkt_buf, TAP_MSGS. However,
the calculation isn't quite correct: TAP_MSGS is based on ETH_ZLEN (60) as
the minimum possible L2 frame size. But, we don't enforce that L2 frames
are at least ETH_ZLEN when we receive them from the tap backend, and since
we're dealing with virtual interfaces we don't have the physical Ethernet
limitations requiring that length. Indeed it is possible to generate a
legitimate frame smaller than that (e.g. a zero-payload UDP/IPv4 frame on
the 'pasta' backend is only 42 bytes long).
It's also unclear if this limit is sufficient for vhost-user which isn't
limited by the size of pkt_buf as the other modes are.
We could attempt to correct the calculation, but that would leave us with
even larger arrays, which in practice rarely accumulate more than a handful
of packets. So, instead, put an arbitrary cap on the number of packets we
can put in a batch, and if we run out of space, process and flush the
batch.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
packet.c | 13 ++++++++++++-
packet.h | 3 +++
passt.h | 2 --
tap.c | 18 +++++++++++++++---
tap.h | 3 ++-
vu_common.c | 3 ++-
6 files changed, 34 insertions(+), 8 deletions(-)
diff --git a/packet.c b/packet.c
index 5bfa7304..b68580cc 100644
--- a/packet.c
+++ b/packet.c
@@ -22,6 +22,17 @@
#include "util.h"
#include "log.h"
+/**
+ * pool_full() - Is a packet pool full?
+ * @p: Pointer to packet pool
+ *
+ * Return: true if the pool is full, false if more packets can be added
+ */
+bool pool_full(const struct pool *p)
+{
+ return p->count >= p->size;
+}
+
/**
* packet_add_do() - Add data as packet descriptor to given pool
* @p: Existing pool
@@ -35,7 +46,7 @@ void packet_add_do(struct pool *p, size_t len, const char *start,
{
size_t idx = p->count;
- if (idx >= p->size) {
+ if (pool_full(p)) {
trace("add packet index %zu to pool with size %zu, %s:%i",
idx, p->size, func, line);
return;
diff --git a/packet.h b/packet.h
index 98eb8812..3618f213 100644
--- a/packet.h
+++ b/packet.h
@@ -6,6 +6,8 @@
#ifndef PACKET_H
#define PACKET_H
+#include <stdbool.h>
+
/**
* struct pool - Generic pool of packets stored in nmemory
* @size: Number of usable descriptors for the pool
@@ -23,6 +25,7 @@ void packet_add_do(struct pool *p, size_t len, const char *start,
void *packet_get_do(const struct pool *p, const size_t idx,
size_t offset, size_t len, size_t *left,
const char *func, int line);
+bool pool_full(const struct pool *p);
void pool_flush(struct pool *p);
#define packet_add(p, len, start) \
diff --git a/passt.h b/passt.h
index 0dd4efa0..81b2787f 100644
--- a/passt.h
+++ b/passt.h
@@ -70,8 +70,6 @@ static_assert(sizeof(union epoll_ref) <= sizeof(union epoll_data),
#define TAP_BUF_BYTES \
ROUND_DOWN(((ETH_MAX_MTU + sizeof(uint32_t)) * 128), PAGE_SIZE)
-#define TAP_MSGS \
- DIV_ROUND_UP(TAP_BUF_BYTES, ETH_ZLEN - 2 * ETH_ALEN + sizeof(uint32_t))
#define PKT_BUF_BYTES MAX(TAP_BUF_BYTES, 0)
extern char pkt_buf [PKT_BUF_BYTES];
diff --git a/tap.c b/tap.c
index 68231f09..42370a26 100644
--- a/tap.c
+++ b/tap.c
@@ -61,6 +61,8 @@
#include "vhost_user.h"
#include "vu_common.h"
+#define TAP_MSGS 256
+
/* IPv4 (plus ARP) and IPv6 message batches from tap/guest to IP handlers */
static PACKET_POOL_P(pool_tap4, TAP_MSGS);
static PACKET_POOL_P(pool_tap6, TAP_MSGS);
@@ -966,8 +968,10 @@ void tap_handler(struct ctx *c, const struct timespec *now)
* @c: Execution context
* @l2len: Total L2 packet length
* @p: Packet buffer
+ * @now: Current timestamp
*/
-void tap_add_packet(struct ctx *c, ssize_t l2len, char *p)
+void tap_add_packet(struct ctx *c, ssize_t l2len, char *p,
+ const struct timespec *now)
{
const struct ethhdr *eh;
@@ -983,9 +987,17 @@ void tap_add_packet(struct ctx *c, ssize_t l2len, char *p)
switch (ntohs(eh->h_proto)) {
case ETH_P_ARP:
case ETH_P_IP:
+ if (pool_full(pool_tap4)) {
+ tap4_handler(c, pool_tap4, now);
+ pool_flush(pool_tap4);
+ }
packet_add(pool_tap4, l2len, p);
break;
case ETH_P_IPV6:
+ if (pool_full(pool_tap6)) {
+ tap6_handler(c, pool_tap6, now);
+ pool_flush(pool_tap6);
+ }
packet_add(pool_tap6, l2len, p);
break;
default:
@@ -1066,7 +1078,7 @@ static void tap_passt_input(struct ctx *c, const struct timespec *now)
p += sizeof(uint32_t);
n -= sizeof(uint32_t);
- tap_add_packet(c, l2len, p);
+ tap_add_packet(c, l2len, p, now);
p += l2len;
n -= l2len;
@@ -1129,7 +1141,7 @@ static void tap_pasta_input(struct ctx *c, const struct timespec *now)
len > (ssize_t)ETH_MAX_MTU)
continue;
- tap_add_packet(c, len, pkt_buf + n);
+ tap_add_packet(c, len, pkt_buf + n, now);
}
tap_handler(c, now);
diff --git a/tap.h b/tap.h
index 9731f70c..c9acf860 100644
--- a/tap.h
+++ b/tap.h
@@ -73,6 +73,7 @@ void tap_sock_reset(struct ctx *c);
void tap_backend_init(struct ctx *c);
void tap_flush_pools(void);
void tap_handler(struct ctx *c, const struct timespec *now);
-void tap_add_packet(struct ctx *c, ssize_t l2len, char *p);
+void tap_add_packet(struct ctx *c, ssize_t l2len, char *p,
+ const struct timespec *now);
#endif /* TAP_H */
diff --git a/vu_common.c b/vu_common.c
index bb794193..38511230 100644
--- a/vu_common.c
+++ b/vu_common.c
@@ -157,7 +157,8 @@ static void vu_handle_tx(struct vu_dev *vdev, int index,
tap_add_packet(vdev->context,
elem[count].out_sg[0].iov_len - hdrlen,
- (char *)elem[count].out_sg[0].iov_base + hdrlen);
+ (char *)elem[count].out_sg[0].iov_base + hdrlen,
+ now);
count++;
}
tap_handler(vdev->context, now);
--
@@ -157,7 +157,8 @@ static void vu_handle_tx(struct vu_dev *vdev, int index,
tap_add_packet(vdev->context,
elem[count].out_sg[0].iov_len - hdrlen,
- (char *)elem[count].out_sg[0].iov_base + hdrlen);
+ (char *)elem[count].out_sg[0].iov_base + hdrlen,
+ now);
count++;
}
tap_handler(vdev->context, now);
--
2.47.1
next prev parent reply other threads:[~2024-12-13 12:02 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-13 12:01 [PATCH 0/3] Cleanups to packet pool handling and sizing David Gibson
2024-12-13 12:01 ` [PATCH 1/3] packet: Use flexible array member in struct pool David Gibson
2024-12-13 12:01 ` [PATCH 2/3] packet: Don't have struct pool specify its buffer David Gibson
2024-12-19 9:00 ` Stefano Brivio
2024-12-20 0:59 ` David Gibson
2024-12-20 9:51 ` Stefano Brivio
2024-12-21 6:59 ` David Gibson
2024-12-13 12:01 ` David Gibson [this message]
2024-12-19 9:00 ` [PATCH 3/3] tap: Don't size pool_tap[46] for the maximum number of packets Stefano Brivio
2024-12-20 1:13 ` David Gibson
2024-12-20 9:51 ` Stefano Brivio
2024-12-21 7:00 ` David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241213120156.4123972-4-david@gibson.dropbear.id.au \
--to=david@gibson.dropbear.id.au \
--cc=passt-dev@passt.top \
--cc=sbrivio@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).