* [PATCH 1/9] conf: Use the same optstring for passt and pasta modes
2025-03-11 6:03 [PATCH 0/9] Improve handling of MTU limits David Gibson
@ 2025-03-11 6:03 ` David Gibson
2025-03-11 6:03 ` [PATCH 2/9] conf: Move mode detection into helper function David Gibson
` (7 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: David Gibson @ 2025-03-11 6:03 UTC (permalink / raw)
To: passt-dev, Stefano Brivio; +Cc: David Gibson
Currently we rely on detecting our mode first and use different sets of
(single character) options for each. This means that if you use an option
valid in only one mode in another you'll get the generic usage() message.
We can give more helpful errors with little extra effort by combining all
the options into a single value of the option string and giving bespoke
messages if an option for the wrong mode is used; in fact we already did
this for some single mode options like '-1'.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
conf.c | 20 ++++++++++++++------
1 file changed, 14 insertions(+), 6 deletions(-)
diff --git a/conf.c b/conf.c
index 065e7201..7f20bc84 100644
--- a/conf.c
+++ b/conf.c
@@ -1388,6 +1388,7 @@ void conf(struct ctx *c, int argc, char **argv)
{"repair-path", required_argument, NULL, 28 },
{ 0 },
};
+ const char *optstring = "+dqfel:hs:F:I:p:P:m:a:n:M:g:i:o:D:S:H:461t:u:T:U:";
const char *logname = (c->mode == MODE_PASTA) ? "pasta" : "passt";
char userns[PATH_MAX] = { 0 }, netns[PATH_MAX] = { 0 };
bool copy_addrs_opt = false, copy_routes_opt = false;
@@ -1397,7 +1398,6 @@ void conf(struct ctx *c, int argc, char **argv)
struct fqdn *dnss = c->dns_search;
unsigned int ifi4 = 0, ifi6 = 0;
const char *logfile = NULL;
- const char *optstring;
size_t logsize = 0;
char *runas = NULL;
long fd_tap_opt;
@@ -1408,9 +1408,6 @@ void conf(struct ctx *c, int argc, char **argv)
if (c->mode == MODE_PASTA) {
c->no_dhcp_dns = c->no_dhcp_dns_search = 1;
fwd_default = FWD_AUTO;
- optstring = "+dqfel:hF:I:p:P:m:a:n:M:g:i:o:D:S:H:46t:u:T:U:";
- } else {
- optstring = "+dqfel:hs:F:p:P:m:a:n:M:g:i:o:D:S:H:461t:u:";
}
c->mtu = ROUND_DOWN(ETH_MAX_MTU - ETH_HLEN, sizeof(uint32_t));
@@ -1614,6 +1611,9 @@ void conf(struct ctx *c, int argc, char **argv)
c->foreground = 1;
break;
case 's':
+ if (c->mode == MODE_PASTA)
+ die("-s is for passt / vhost-user mode only");
+
ret = snprintf(c->sock_path, sizeof(c->sock_path), "%s",
optarg);
if (ret <= 0 || ret >= (int)sizeof(c->sock_path))
@@ -1634,6 +1634,9 @@ void conf(struct ctx *c, int argc, char **argv)
*c->sock_path = 0;
break;
case 'I':
+ if (c->mode != MODE_PASTA)
+ die("-I is for pasta mode only");
+
ret = snprintf(c->pasta_ifn, IFNAMSIZ, "%s",
optarg);
if (ret <= 0 || ret >= IFNAMSIZ)
@@ -1790,11 +1793,16 @@ void conf(struct ctx *c, int argc, char **argv)
break;
case 't':
case 'u':
- case 'T':
- case 'U':
case 'D':
/* Handle these later, once addresses are configured */
break;
+ case 'T':
+ case 'U':
+ if (c->mode != MODE_PASTA)
+ die("-%c is for pasta mode only", name);
+
+ /* Handle properly later, once addresses are configured */
+ break;
case 'h':
usage(argv[0], stdout, EXIT_SUCCESS);
break;
--
@@ -1388,6 +1388,7 @@ void conf(struct ctx *c, int argc, char **argv)
{"repair-path", required_argument, NULL, 28 },
{ 0 },
};
+ const char *optstring = "+dqfel:hs:F:I:p:P:m:a:n:M:g:i:o:D:S:H:461t:u:T:U:";
const char *logname = (c->mode == MODE_PASTA) ? "pasta" : "passt";
char userns[PATH_MAX] = { 0 }, netns[PATH_MAX] = { 0 };
bool copy_addrs_opt = false, copy_routes_opt = false;
@@ -1397,7 +1398,6 @@ void conf(struct ctx *c, int argc, char **argv)
struct fqdn *dnss = c->dns_search;
unsigned int ifi4 = 0, ifi6 = 0;
const char *logfile = NULL;
- const char *optstring;
size_t logsize = 0;
char *runas = NULL;
long fd_tap_opt;
@@ -1408,9 +1408,6 @@ void conf(struct ctx *c, int argc, char **argv)
if (c->mode == MODE_PASTA) {
c->no_dhcp_dns = c->no_dhcp_dns_search = 1;
fwd_default = FWD_AUTO;
- optstring = "+dqfel:hF:I:p:P:m:a:n:M:g:i:o:D:S:H:46t:u:T:U:";
- } else {
- optstring = "+dqfel:hs:F:p:P:m:a:n:M:g:i:o:D:S:H:461t:u:";
}
c->mtu = ROUND_DOWN(ETH_MAX_MTU - ETH_HLEN, sizeof(uint32_t));
@@ -1614,6 +1611,9 @@ void conf(struct ctx *c, int argc, char **argv)
c->foreground = 1;
break;
case 's':
+ if (c->mode == MODE_PASTA)
+ die("-s is for passt / vhost-user mode only");
+
ret = snprintf(c->sock_path, sizeof(c->sock_path), "%s",
optarg);
if (ret <= 0 || ret >= (int)sizeof(c->sock_path))
@@ -1634,6 +1634,9 @@ void conf(struct ctx *c, int argc, char **argv)
*c->sock_path = 0;
break;
case 'I':
+ if (c->mode != MODE_PASTA)
+ die("-I is for pasta mode only");
+
ret = snprintf(c->pasta_ifn, IFNAMSIZ, "%s",
optarg);
if (ret <= 0 || ret >= IFNAMSIZ)
@@ -1790,11 +1793,16 @@ void conf(struct ctx *c, int argc, char **argv)
break;
case 't':
case 'u':
- case 'T':
- case 'U':
case 'D':
/* Handle these later, once addresses are configured */
break;
+ case 'T':
+ case 'U':
+ if (c->mode != MODE_PASTA)
+ die("-%c is for pasta mode only", name);
+
+ /* Handle properly later, once addresses are configured */
+ break;
case 'h':
usage(argv[0], stdout, EXIT_SUCCESS);
break;
--
2.48.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 2/9] conf: Move mode detection into helper function
2025-03-11 6:03 [PATCH 0/9] Improve handling of MTU limits David Gibson
2025-03-11 6:03 ` [PATCH 1/9] conf: Use the same optstring for passt and pasta modes David Gibson
@ 2025-03-11 6:03 ` David Gibson
2025-03-11 6:03 ` [PATCH 3/9] conf: Detect vhost-user mode earlier David Gibson
` (6 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: David Gibson @ 2025-03-11 6:03 UTC (permalink / raw)
To: passt-dev, Stefano Brivio; +Cc: David Gibson
One of the first things we need to do is determine if we're in passt mode
or pasta mode. Currently this is open-coded in main(), by examining
argv[0]. We want to complexify this a bit in future to cover vhost-user
mode as well. Prepare for this, by moving the mode detection into a new
conf_mode() function.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
conf.c | 26 ++++++++++++++++++++++++++
conf.h | 1 +
passt.c | 14 ++------------
3 files changed, 29 insertions(+), 12 deletions(-)
diff --git a/conf.c b/conf.c
index 7f20bc84..2022ea1d 100644
--- a/conf.c
+++ b/conf.c
@@ -991,6 +991,32 @@ pasta_opts:
_exit(status);
}
+/**
+ * conf_mode() - Determine passt/pasta's operating mode from command line
+ * @argc: Argument count
+ * @argv: Command line arguments
+ *
+ * Return: mode to operate in, PASTA or PASST
+ */
+/* cppcheck-suppress constParameter */
+enum passt_modes conf_mode(int argc, char *argv[])
+{
+ char argv0[PATH_MAX], *basearg0;
+
+ if (argc < 1)
+ die("Cannot determine argv[0]");
+
+ strncpy(argv0, argv[0], PATH_MAX - 1);
+ basearg0 = basename(argv0);
+ if (strstr(basearg0, "pasta"))
+ return MODE_PASTA;
+
+ if (strstr(basearg0, "passt"))
+ return MODE_PASST;
+
+ die("Cannot determine mode, invoke as \"passt\" or \"pasta\"");
+}
+
/**
* conf_print() - Print fundamental configuration parameters
* @c: Execution context
diff --git a/conf.h b/conf.h
index 9d2143db..b45ad746 100644
--- a/conf.h
+++ b/conf.h
@@ -6,6 +6,7 @@
#ifndef CONF_H
#define CONF_H
+enum passt_modes conf_mode(int argc, char *argv[]);
void conf(struct ctx *c, int argc, char **argv);
#endif /* CONF_H */
diff --git a/passt.c b/passt.c
index 868842b0..0bd2a292 100644
--- a/passt.c
+++ b/passt.c
@@ -191,7 +191,6 @@ int main(int argc, char **argv)
{
struct epoll_event events[EPOLL_EVENTS];
int nfds, i, devnull_fd = -1;
- char argv0[PATH_MAX], *name;
struct ctx c = { 0 };
struct rlimit limit;
struct timespec now;
@@ -213,21 +212,12 @@ int main(int argc, char **argv)
sigaction(SIGTERM, &sa, NULL);
sigaction(SIGQUIT, &sa, NULL);
- if (argc < 1)
- _exit(EXIT_FAILURE);
+ c.mode = conf_mode(argc, argv);
- strncpy(argv0, argv[0], PATH_MAX - 1);
- name = basename(argv0);
- if (strstr(name, "pasta")) {
+ if (c.mode == MODE_PASTA) {
sa.sa_handler = pasta_child_handler;
if (sigaction(SIGCHLD, &sa, NULL))
die_perror("Couldn't install signal handlers");
-
- c.mode = MODE_PASTA;
- } else if (strstr(name, "passt")) {
- c.mode = MODE_PASST;
- } else {
- _exit(EXIT_FAILURE);
}
if (signal(SIGPIPE, SIG_IGN) == SIG_ERR)
--
@@ -191,7 +191,6 @@ int main(int argc, char **argv)
{
struct epoll_event events[EPOLL_EVENTS];
int nfds, i, devnull_fd = -1;
- char argv0[PATH_MAX], *name;
struct ctx c = { 0 };
struct rlimit limit;
struct timespec now;
@@ -213,21 +212,12 @@ int main(int argc, char **argv)
sigaction(SIGTERM, &sa, NULL);
sigaction(SIGQUIT, &sa, NULL);
- if (argc < 1)
- _exit(EXIT_FAILURE);
+ c.mode = conf_mode(argc, argv);
- strncpy(argv0, argv[0], PATH_MAX - 1);
- name = basename(argv0);
- if (strstr(name, "pasta")) {
+ if (c.mode == MODE_PASTA) {
sa.sa_handler = pasta_child_handler;
if (sigaction(SIGCHLD, &sa, NULL))
die_perror("Couldn't install signal handlers");
-
- c.mode = MODE_PASTA;
- } else if (strstr(name, "passt")) {
- c.mode = MODE_PASST;
- } else {
- _exit(EXIT_FAILURE);
}
if (signal(SIGPIPE, SIG_IGN) == SIG_ERR)
--
2.48.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 3/9] conf: Detect vhost-user mode earlier
2025-03-11 6:03 [PATCH 0/9] Improve handling of MTU limits David Gibson
2025-03-11 6:03 ` [PATCH 1/9] conf: Use the same optstring for passt and pasta modes David Gibson
2025-03-11 6:03 ` [PATCH 2/9] conf: Move mode detection into helper function David Gibson
@ 2025-03-11 6:03 ` David Gibson
2025-03-11 22:45 ` Stefano Brivio
2025-03-11 6:03 ` [PATCH 4/9] packet: Give explicit name to maximum packet size David Gibson
` (5 subsequent siblings)
8 siblings, 1 reply; 14+ messages in thread
From: David Gibson @ 2025-03-11 6:03 UTC (permalink / raw)
To: passt-dev, Stefano Brivio; +Cc: David Gibson
We detect our operating mode in conf_mode(), unless we're using vhost-user
mode, in which case we change it later when we parse the --vhost-user
option. That means we need to delay parsing the --repair-path option (for
vhost-user only) until still later.
However, there are many other places in the main option parsing loop which
also rely on mode. We get away with those, because they happen to be able
to treat passt and vhost-user modes identically. This is potentially
confusing, though. So, move setting of MODE_VU into conf_mode() so
c->mode always has its final value from that point onwards.
To match, we move the parsing of --repair-path back into the main option
parsing loop.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
conf.c | 43 ++++++++++++++++++++++++++-----------------
1 file changed, 26 insertions(+), 17 deletions(-)
diff --git a/conf.c b/conf.c
index 2022ea1d..b58e2a6e 100644
--- a/conf.c
+++ b/conf.c
@@ -998,10 +998,23 @@ pasta_opts:
*
* Return: mode to operate in, PASTA or PASST
*/
-/* cppcheck-suppress constParameter */
enum passt_modes conf_mode(int argc, char *argv[])
{
+ int vhost_user = 0;
+ const struct option optvu[] = {
+ {"vhost-user", no_argument, &vhost_user, 1 },
+ { 0 },
+ };
char argv0[PATH_MAX], *basearg0;
+ int name;
+
+ optind = 0;
+ do {
+ name = getopt_long(argc, argv, "-:", optvu, NULL);
+ } while (name != -1);
+
+ if (vhost_user)
+ return MODE_VU;
if (argc < 1)
die("Cannot determine argv[0]");
@@ -1604,9 +1617,8 @@ void conf(struct ctx *c, int argc, char **argv)
die("Invalid host nameserver address: %s", optarg);
case 25:
- if (c->mode == MODE_PASTA)
- die("--vhost-user is for passt mode only");
- c->mode = MODE_VU;
+ /* Already handled in conf_mode() */
+ ASSERT(c->mode == MODE_VU);
break;
case 26:
vu_print_capabilities();
@@ -1617,7 +1629,14 @@ void conf(struct ctx *c, int argc, char **argv)
die("Invalid FQDN: %s", optarg);
break;
case 28:
- /* Handle this once we checked --vhost-user */
+ if (c->mode != MODE_VU && strcmp(optarg, "none"))
+ die("--repair-path is for vhost-user mode only");
+
+ if (snprintf_check(c->repair_path,
+ sizeof(c->repair_path), "%s",
+ optarg))
+ die("Invalid passt-repair path: %s", optarg);
+
break;
case 'd':
c->debug = 1;
@@ -1917,8 +1936,8 @@ void conf(struct ctx *c, int argc, char **argv)
if (c->ifi4 && IN4_IS_ADDR_UNSPECIFIED(&c->ip4.guest_gw))
c->no_dhcp = 1;
- /* Inbound port options, DNS, and --repair-path can be parsed now, after
- * IPv4/IPv6 settings and --vhost-user.
+ /* Inbound port options and DNS can be parsed now, after IPv4/IPv6
+ * settings
*/
fwd_probe_ephemeral();
udp_portmap_clear();
@@ -1964,16 +1983,6 @@ void conf(struct ctx *c, int argc, char **argv)
}
die("Cannot use DNS address %s", optarg);
- } else if (name == 28) {
- if (c->mode != MODE_VU && strcmp(optarg, "none"))
- die("--repair-path is for vhost-user mode only");
-
- if (snprintf_check(c->repair_path,
- sizeof(c->repair_path), "%s",
- optarg))
- die("Invalid passt-repair path: %s", optarg);
-
- break;
}
} while (name != -1);
--
@@ -998,10 +998,23 @@ pasta_opts:
*
* Return: mode to operate in, PASTA or PASST
*/
-/* cppcheck-suppress constParameter */
enum passt_modes conf_mode(int argc, char *argv[])
{
+ int vhost_user = 0;
+ const struct option optvu[] = {
+ {"vhost-user", no_argument, &vhost_user, 1 },
+ { 0 },
+ };
char argv0[PATH_MAX], *basearg0;
+ int name;
+
+ optind = 0;
+ do {
+ name = getopt_long(argc, argv, "-:", optvu, NULL);
+ } while (name != -1);
+
+ if (vhost_user)
+ return MODE_VU;
if (argc < 1)
die("Cannot determine argv[0]");
@@ -1604,9 +1617,8 @@ void conf(struct ctx *c, int argc, char **argv)
die("Invalid host nameserver address: %s", optarg);
case 25:
- if (c->mode == MODE_PASTA)
- die("--vhost-user is for passt mode only");
- c->mode = MODE_VU;
+ /* Already handled in conf_mode() */
+ ASSERT(c->mode == MODE_VU);
break;
case 26:
vu_print_capabilities();
@@ -1617,7 +1629,14 @@ void conf(struct ctx *c, int argc, char **argv)
die("Invalid FQDN: %s", optarg);
break;
case 28:
- /* Handle this once we checked --vhost-user */
+ if (c->mode != MODE_VU && strcmp(optarg, "none"))
+ die("--repair-path is for vhost-user mode only");
+
+ if (snprintf_check(c->repair_path,
+ sizeof(c->repair_path), "%s",
+ optarg))
+ die("Invalid passt-repair path: %s", optarg);
+
break;
case 'd':
c->debug = 1;
@@ -1917,8 +1936,8 @@ void conf(struct ctx *c, int argc, char **argv)
if (c->ifi4 && IN4_IS_ADDR_UNSPECIFIED(&c->ip4.guest_gw))
c->no_dhcp = 1;
- /* Inbound port options, DNS, and --repair-path can be parsed now, after
- * IPv4/IPv6 settings and --vhost-user.
+ /* Inbound port options and DNS can be parsed now, after IPv4/IPv6
+ * settings
*/
fwd_probe_ephemeral();
udp_portmap_clear();
@@ -1964,16 +1983,6 @@ void conf(struct ctx *c, int argc, char **argv)
}
die("Cannot use DNS address %s", optarg);
- } else if (name == 28) {
- if (c->mode != MODE_VU && strcmp(optarg, "none"))
- die("--repair-path is for vhost-user mode only");
-
- if (snprintf_check(c->repair_path,
- sizeof(c->repair_path), "%s",
- optarg))
- die("Invalid passt-repair path: %s", optarg);
-
- break;
}
} while (name != -1);
--
2.48.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 3/9] conf: Detect vhost-user mode earlier
2025-03-11 6:03 ` [PATCH 3/9] conf: Detect vhost-user mode earlier David Gibson
@ 2025-03-11 22:45 ` Stefano Brivio
2025-03-12 0:48 ` David Gibson
0 siblings, 1 reply; 14+ messages in thread
From: Stefano Brivio @ 2025-03-11 22:45 UTC (permalink / raw)
To: David Gibson; +Cc: passt-dev
On Tue, 11 Mar 2025 17:03:12 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:
> We detect our operating mode in conf_mode(), unless we're using vhost-user
> mode, in which case we change it later when we parse the --vhost-user
> option. That means we need to delay parsing the --repair-path option (for
> vhost-user only) until still later.
>
> However, there are many other places in the main option parsing loop which
> also rely on mode. We get away with those, because they happen to be able
> to treat passt and vhost-user modes identically. This is potentially
> confusing, though. So, move setting of MODE_VU into conf_mode() so
> c->mode always has its final value from that point onwards.
>
> To match, we move the parsing of --repair-path back into the main option
> parsing loop.
>
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
> conf.c | 43 ++++++++++++++++++++++++++-----------------
> 1 file changed, 26 insertions(+), 17 deletions(-)
>
> diff --git a/conf.c b/conf.c
> index 2022ea1d..b58e2a6e 100644
> --- a/conf.c
> +++ b/conf.c
> @@ -998,10 +998,23 @@ pasta_opts:
> *
> * Return: mode to operate in, PASTA or PASST
> */
> -/* cppcheck-suppress constParameter */
> enum passt_modes conf_mode(int argc, char *argv[])
> {
> + int vhost_user = 0;
> + const struct option optvu[] = {
> + {"vhost-user", no_argument, &vhost_user, 1 },
> + { 0 },
> + };
> char argv0[PATH_MAX], *basearg0;
> + int name;
> +
> + optind = 0;
> + do {
> + name = getopt_long(argc, argv, "-:", optvu, NULL);
> + } while (name != -1);
> +
> + if (vhost_user)
> + return MODE_VU;
>
> if (argc < 1)
> die("Cannot determine argv[0]");
> @@ -1604,9 +1617,8 @@ void conf(struct ctx *c, int argc, char **argv)
>
> die("Invalid host nameserver address: %s", optarg);
> case 25:
> - if (c->mode == MODE_PASTA)
> - die("--vhost-user is for passt mode only");
This check should now be moved to conf_mode() instead of being dropped,
otherwise you can do:
$ ./pasta -f --vhost-user
and at this point, the mode is MODE_VU, so it's all fine, but I don't
think it's intended (...or is it?).
> - c->mode = MODE_VU;
> + /* Already handled in conf_mode() */
> + ASSERT(c->mode == MODE_VU);
> break;
> case 26:
> vu_print_capabilities();
Pre-existing, but now we can fix this: case 26 (--print-capabilities)
should only be accepted if (c->mode == MODE_VU).
It can also be done in another patch I would say, if you don't want to
re-spin this.
--
Stefano
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 3/9] conf: Detect vhost-user mode earlier
2025-03-11 22:45 ` Stefano Brivio
@ 2025-03-12 0:48 ` David Gibson
0 siblings, 0 replies; 14+ messages in thread
From: David Gibson @ 2025-03-12 0:48 UTC (permalink / raw)
To: Stefano Brivio; +Cc: passt-dev
[-- Attachment #1: Type: text/plain, Size: 3746 bytes --]
On Tue, Mar 11, 2025 at 11:45:03PM +0100, Stefano Brivio wrote:
> On Tue, 11 Mar 2025 17:03:12 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
>
> > We detect our operating mode in conf_mode(), unless we're using vhost-user
> > mode, in which case we change it later when we parse the --vhost-user
> > option. That means we need to delay parsing the --repair-path option (for
> > vhost-user only) until still later.
> >
> > However, there are many other places in the main option parsing loop which
> > also rely on mode. We get away with those, because they happen to be able
> > to treat passt and vhost-user modes identically. This is potentially
> > confusing, though. So, move setting of MODE_VU into conf_mode() so
> > c->mode always has its final value from that point onwards.
> >
> > To match, we move the parsing of --repair-path back into the main option
> > parsing loop.
> >
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> > conf.c | 43 ++++++++++++++++++++++++++-----------------
> > 1 file changed, 26 insertions(+), 17 deletions(-)
> >
> > diff --git a/conf.c b/conf.c
> > index 2022ea1d..b58e2a6e 100644
> > --- a/conf.c
> > +++ b/conf.c
> > @@ -998,10 +998,23 @@ pasta_opts:
> > *
> > * Return: mode to operate in, PASTA or PASST
> > */
> > -/* cppcheck-suppress constParameter */
> > enum passt_modes conf_mode(int argc, char *argv[])
> > {
> > + int vhost_user = 0;
> > + const struct option optvu[] = {
> > + {"vhost-user", no_argument, &vhost_user, 1 },
> > + { 0 },
> > + };
> > char argv0[PATH_MAX], *basearg0;
> > + int name;
> > +
> > + optind = 0;
> > + do {
> > + name = getopt_long(argc, argv, "-:", optvu, NULL);
> > + } while (name != -1);
> > +
> > + if (vhost_user)
> > + return MODE_VU;
> >
> > if (argc < 1)
> > die("Cannot determine argv[0]");
> > @@ -1604,9 +1617,8 @@ void conf(struct ctx *c, int argc, char **argv)
> >
> > die("Invalid host nameserver address: %s", optarg);
> > case 25:
> > - if (c->mode == MODE_PASTA)
> > - die("--vhost-user is for passt mode only");
>
> This check should now be moved to conf_mode() instead of being dropped,
> otherwise you can do:
>
> $ ./pasta -f --vhost-user
>
> and at this point, the mode is MODE_VU, so it's all fine, but I don't
> think it's intended (...or is it?).
It's more or less intended. To me it seemed simpler to treat
"vhost-user mode" as co-equal with "/dev/net/tun mode" (pasta) or
"qemu -net stream mode" (passt), rather than having vu be sort of a
sub-mode of passt. It's true that vu mode has slightly more in common
with passt mode than pasta at the moment, but I don't see that as
really inherent.
I also saw this as a precursor to a "--mode whatever" option which
would override the mode regardless of argv[0], in case there are
circumstances where manipulating argv[0] is inconvenient.
But if you'd really prefer I can reinstate the check.
> > - c->mode = MODE_VU;
> > + /* Already handled in conf_mode() */
> > + ASSERT(c->mode == MODE_VU);
> > break;
> > case 26:
> > vu_print_capabilities();
>
> Pre-existing, but now we can fix this: case 26 (--print-capabilities)
> should only be accepted if (c->mode == MODE_VU).
I was unsure about this, because I wasn't certain if --vhost-user was
passed when we were invoked just to probe capabilities.
> It can also be done in another patch I would say, if you don't want to
> re-spin this.
>
--
David Gibson (he or they) | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you, not the other way
| around.
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 4/9] packet: Give explicit name to maximum packet size
2025-03-11 6:03 [PATCH 0/9] Improve handling of MTU limits David Gibson
` (2 preceding siblings ...)
2025-03-11 6:03 ` [PATCH 3/9] conf: Detect vhost-user mode earlier David Gibson
@ 2025-03-11 6:03 ` David Gibson
2025-03-11 6:03 ` [PATCH 5/9] packet: Remove redundant TAP_BUF_BYTES define David Gibson
` (4 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: David Gibson @ 2025-03-11 6:03 UTC (permalink / raw)
To: passt-dev, Stefano Brivio; +Cc: David Gibson
We verify that every packet we store in a pool (and every partial packet
we retreive from it) has a length no longer than UINT16_MAX. This
originated in the older packet pool implementation which stored packet
lengths in a uint16_t. Now, that packets are represented by a struct
iovec with its size_t length, this check serves only as a sanity / security
check that we don't have some wildly out of range length due to a bug
elsewhere.
We have may reasons to (slightly) increase this limit in future, so in
preparation, give this quantity an explicit name - PACKET_MAX_LEN.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
packet.c | 4 ++--
packet.h | 3 +++
2 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/packet.c b/packet.c
index 0330b548..bcac0375 100644
--- a/packet.c
+++ b/packet.c
@@ -83,7 +83,7 @@ void packet_add_do(struct pool *p, size_t len, const char *start,
if (packet_check_range(p, start, len, func, line))
return;
- if (len > UINT16_MAX) {
+ if (len > PACKET_MAX_LEN) {
trace("add packet length %zu, %s:%i", len, func, line);
return;
}
@@ -119,7 +119,7 @@ void *packet_get_do(const struct pool *p, size_t idx, size_t offset,
return NULL;
}
- if (len > UINT16_MAX) {
+ if (len > PACKET_MAX_LEN) {
if (func) {
trace("packet data length %zu, %s:%i",
len, func, line);
diff --git a/packet.h b/packet.h
index bdc07fef..d099f026 100644
--- a/packet.h
+++ b/packet.h
@@ -6,6 +6,9 @@
#ifndef PACKET_H
#define PACKET_H
+/* Maximum size of a single packet stored in pool, including headers */
+#define PACKET_MAX_LEN UINT16_MAX
+
/**
* struct pool - Generic pool of packets stored in a buffer
* @buf: Buffer storing packet descriptors,
--
@@ -6,6 +6,9 @@
#ifndef PACKET_H
#define PACKET_H
+/* Maximum size of a single packet stored in pool, including headers */
+#define PACKET_MAX_LEN UINT16_MAX
+
/**
* struct pool - Generic pool of packets stored in a buffer
* @buf: Buffer storing packet descriptors,
--
2.48.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 5/9] packet: Remove redundant TAP_BUF_BYTES define
2025-03-11 6:03 [PATCH 0/9] Improve handling of MTU limits David Gibson
` (3 preceding siblings ...)
2025-03-11 6:03 ` [PATCH 4/9] packet: Give explicit name to maximum packet size David Gibson
@ 2025-03-11 6:03 ` David Gibson
2025-03-11 6:03 ` [PATCH 6/9] tap: Use explicit defines for maximum length of L2 frame David Gibson
` (3 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: David Gibson @ 2025-03-11 6:03 UTC (permalink / raw)
To: passt-dev, Stefano Brivio; +Cc: David Gibson
Currently we define both TAP_BUF_BYTES and PKT_BUF_BYTES as essentially
the same thing. They'll be different only if TAP_BUF_BYTES is negative,
which makes no sense. So, remove TAP_BUF_BYTES and just use PKT_BUF_BYTES.
In addition, most places we use this to just mean the size of the main
packet buffer (pkt_buf) for which we can just directly use sizeof.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
passt.c | 2 +-
passt.h | 5 ++---
tap.c | 4 ++--
3 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/passt.c b/passt.c
index 0bd2a292..cd067728 100644
--- a/passt.c
+++ b/passt.c
@@ -223,7 +223,7 @@ int main(int argc, char **argv)
if (signal(SIGPIPE, SIG_IGN) == SIG_ERR)
die_perror("Couldn't set disposition for SIGPIPE");
- madvise(pkt_buf, TAP_BUF_BYTES, MADV_HUGEPAGE);
+ madvise(pkt_buf, sizeof(pkt_buf), MADV_HUGEPAGE);
c.epollfd = epoll_create1(EPOLL_CLOEXEC);
if (c.epollfd == -1)
diff --git a/passt.h b/passt.h
index 28d13892..6b248051 100644
--- a/passt.h
+++ b/passt.h
@@ -69,12 +69,11 @@ union epoll_ref {
static_assert(sizeof(union epoll_ref) <= sizeof(union epoll_data),
"epoll_ref must have same size as epoll_data");
-#define TAP_BUF_BYTES \
+#define PKT_BUF_BYTES \
ROUND_DOWN(((ETH_MAX_MTU + sizeof(uint32_t)) * 128), PAGE_SIZE)
#define TAP_MSGS \
- DIV_ROUND_UP(TAP_BUF_BYTES, ETH_ZLEN - 2 * ETH_ALEN + sizeof(uint32_t))
+ DIV_ROUND_UP(PKT_BUF_BYTES, ETH_ZLEN - 2 * ETH_ALEN + sizeof(uint32_t))
-#define PKT_BUF_BYTES MAX(TAP_BUF_BYTES, 0)
extern char pkt_buf [PKT_BUF_BYTES];
extern char *epoll_type_str[];
diff --git a/tap.c b/tap.c
index 4541f51d..fb306e75 100644
--- a/tap.c
+++ b/tap.c
@@ -1080,7 +1080,7 @@ static void tap_passt_input(struct ctx *c, const struct timespec *now)
do {
n = recv(c->fd_tap, pkt_buf + partial_len,
- TAP_BUF_BYTES - partial_len, MSG_DONTWAIT);
+ sizeof(pkt_buf) - partial_len, MSG_DONTWAIT);
} while ((n < 0) && errno == EINTR);
if (n < 0) {
@@ -1151,7 +1151,7 @@ static void tap_pasta_input(struct ctx *c, const struct timespec *now)
tap_flush_pools();
- for (n = 0; n <= (ssize_t)(TAP_BUF_BYTES - ETH_MAX_MTU); n += len) {
+ for (n = 0; n <= (ssize_t)(sizeof(pkt_buf) - ETH_MAX_MTU); n += len) {
len = read(c->fd_tap, pkt_buf + n, ETH_MAX_MTU);
if (len == 0) {
--
@@ -1080,7 +1080,7 @@ static void tap_passt_input(struct ctx *c, const struct timespec *now)
do {
n = recv(c->fd_tap, pkt_buf + partial_len,
- TAP_BUF_BYTES - partial_len, MSG_DONTWAIT);
+ sizeof(pkt_buf) - partial_len, MSG_DONTWAIT);
} while ((n < 0) && errno == EINTR);
if (n < 0) {
@@ -1151,7 +1151,7 @@ static void tap_pasta_input(struct ctx *c, const struct timespec *now)
tap_flush_pools();
- for (n = 0; n <= (ssize_t)(TAP_BUF_BYTES - ETH_MAX_MTU); n += len) {
+ for (n = 0; n <= (ssize_t)(sizeof(pkt_buf) - ETH_MAX_MTU); n += len) {
len = read(c->fd_tap, pkt_buf + n, ETH_MAX_MTU);
if (len == 0) {
--
2.48.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 6/9] tap: Use explicit defines for maximum length of L2 frame
2025-03-11 6:03 [PATCH 0/9] Improve handling of MTU limits David Gibson
` (4 preceding siblings ...)
2025-03-11 6:03 ` [PATCH 5/9] packet: Remove redundant TAP_BUF_BYTES define David Gibson
@ 2025-03-11 6:03 ` David Gibson
2025-03-11 22:45 ` Stefano Brivio
2025-03-11 6:03 ` [PATCH 7/9] Simplify sizing of pkt_buf David Gibson
` (2 subsequent siblings)
8 siblings, 1 reply; 14+ messages in thread
From: David Gibson @ 2025-03-11 6:03 UTC (permalink / raw)
To: passt-dev, Stefano Brivio; +Cc: David Gibson, David Gibson
Currently in tap.c we (mostly) use ETH_MAX_MTU as the maximum length of
an L2 frame. This define comes from the kernel, but it's badly named and
used confusingly.
First, it doesn't really have anything to do with Ethernet, which has no
structural limit on frame lengths. It comes more from either a) IP which
imposes a 64k datagram limit or b) from internal buffers used in various
places in the kernel (and in passt).
Worse, MTU generally means the maximum size of the IP (L3) datagram which
may be transferred, _not_ counting the L2 headers. In the kernel
ETH_MAX_MTU is sometimes used that way, but sometimes seems to be used as
a maximum frame length, _including_ L2 headers. In tap.c we're mostly
using it in the second way.
Finally, each of our tap backends could have different limits on the frame
size imposed by the mechanisms they're using.
Start clearing up this confusion by replacing it in tap.c with new
L2_MAX_LEN_* defines which specifically refer to the maximum L2 frame
length for each backend.
Signed-off-by: David Gibson <dgibson@redhat.com>
---
tap.c | 18 ++++++++++++++----
tap.h | 25 +++++++++++++++++++++++++
2 files changed, 39 insertions(+), 4 deletions(-)
diff --git a/tap.c b/tap.c
index fb306e75..4840dcfa 100644
--- a/tap.c
+++ b/tap.c
@@ -62,6 +62,15 @@
#include "vhost_user.h"
#include "vu_common.h"
+/* Maximum allowed frame lengths (including L2 header) */
+
+static_assert(L2_MAX_LEN_PASTA <= PACKET_MAX_LEN,
+ "packet pool can't store maximum size pasta frame");
+static_assert(L2_MAX_LEN_PASST <= PACKET_MAX_LEN,
+ "packet pool can't store maximum size qemu socket frame");
+static_assert(L2_MAX_LEN_VU <= PACKET_MAX_LEN,
+ "packet pool can't store maximum size vhost-user frame");
+
/* IPv4 (plus ARP) and IPv6 message batches from tap/guest to IP handlers */
static PACKET_POOL_NOINIT(pool_tap4, TAP_MSGS, pkt_buf);
static PACKET_POOL_NOINIT(pool_tap6, TAP_MSGS, pkt_buf);
@@ -1097,7 +1106,8 @@ static void tap_passt_input(struct ctx *c, const struct timespec *now)
while (n >= (ssize_t)sizeof(uint32_t)) {
uint32_t l2len = ntohl_unaligned(p);
- if (l2len < sizeof(struct ethhdr) || l2len > ETH_MAX_MTU) {
+ if (l2len < sizeof(struct ethhdr) ||
+ l2len > L2_MAX_LEN_PASST) {
err("Bad frame size from guest, resetting connection");
tap_sock_reset(c);
return;
@@ -1151,8 +1161,8 @@ static void tap_pasta_input(struct ctx *c, const struct timespec *now)
tap_flush_pools();
- for (n = 0; n <= (ssize_t)(sizeof(pkt_buf) - ETH_MAX_MTU); n += len) {
- len = read(c->fd_tap, pkt_buf + n, ETH_MAX_MTU);
+ for (n = 0; n <= (ssize_t)(sizeof(pkt_buf) - L2_MAX_LEN_PASTA); n += len) {
+ len = read(c->fd_tap, pkt_buf + n, L2_MAX_LEN_PASTA);
if (len == 0) {
die("EOF on tap device, exiting");
@@ -1170,7 +1180,7 @@ static void tap_pasta_input(struct ctx *c, const struct timespec *now)
/* Ignore frames of bad length */
if (len < (ssize_t)sizeof(struct ethhdr) ||
- len > (ssize_t)ETH_MAX_MTU)
+ len > (ssize_t)L2_MAX_LEN_PASTA)
continue;
tap_add_packet(c, len, pkt_buf + n);
diff --git a/tap.h b/tap.h
index a2c3b87d..140e3305 100644
--- a/tap.h
+++ b/tap.h
@@ -6,6 +6,31 @@
#ifndef TAP_H
#define TAP_H
+/** L2_MAX_LEN_PASTA - Maximum frame length for pasta mode (with L2 header)
+ *
+ * The kernel tuntap device imposes a maximum frame size of 65535 including
+ * 'hard_header_len' (14 bytes for L2 Ethernet in the case of "tap" mode).
+ */
+#define L2_MAX_LEN_PASTA USHRT_MAX
+
+/** L2_MAX_LEN_PASST - Maximum frame length for passt mode (with L2 header)
+ *
+ * The only structural limit the Qemu socket protocol imposes on frames is
+ * (2^32-1) bytes, but that would be ludicrously long in practice. For now,
+ * limit it somewhat arbitrarily to 65535 bytes. FIXME: Work out an appropriate
+ * limit with more precision.
+ */
+#define L2_MAX_LEN_PASST USHRT_MAX
+
+/** L2_MAX_LEN_VU - Maximum frame length for vhost-user mode (with L2 header)
+ *
+ * VU allows multiple buffers per frame, each of which can be quite large, so
+ * the inherent frame size limit is rather large. Much larger than is actually
+ * useful for IP. For now limit arbitrarily to 65535 bytes. FIXME: Work out an
+ * appropriate limit with more precision.
+ */
+#define L2_MAX_LEN_VU USHRT_MAX
+
struct udphdr;
/**
--
@@ -6,6 +6,31 @@
#ifndef TAP_H
#define TAP_H
+/** L2_MAX_LEN_PASTA - Maximum frame length for pasta mode (with L2 header)
+ *
+ * The kernel tuntap device imposes a maximum frame size of 65535 including
+ * 'hard_header_len' (14 bytes for L2 Ethernet in the case of "tap" mode).
+ */
+#define L2_MAX_LEN_PASTA USHRT_MAX
+
+/** L2_MAX_LEN_PASST - Maximum frame length for passt mode (with L2 header)
+ *
+ * The only structural limit the Qemu socket protocol imposes on frames is
+ * (2^32-1) bytes, but that would be ludicrously long in practice. For now,
+ * limit it somewhat arbitrarily to 65535 bytes. FIXME: Work out an appropriate
+ * limit with more precision.
+ */
+#define L2_MAX_LEN_PASST USHRT_MAX
+
+/** L2_MAX_LEN_VU - Maximum frame length for vhost-user mode (with L2 header)
+ *
+ * VU allows multiple buffers per frame, each of which can be quite large, so
+ * the inherent frame size limit is rather large. Much larger than is actually
+ * useful for IP. For now limit arbitrarily to 65535 bytes. FIXME: Work out an
+ * appropriate limit with more precision.
+ */
+#define L2_MAX_LEN_VU USHRT_MAX
+
struct udphdr;
/**
--
2.48.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 6/9] tap: Use explicit defines for maximum length of L2 frame
2025-03-11 6:03 ` [PATCH 6/9] tap: Use explicit defines for maximum length of L2 frame David Gibson
@ 2025-03-11 22:45 ` Stefano Brivio
2025-03-12 0:56 ` David Gibson
0 siblings, 1 reply; 14+ messages in thread
From: Stefano Brivio @ 2025-03-11 22:45 UTC (permalink / raw)
To: David Gibson; +Cc: passt-dev, David Gibson
Nits only (I can fix it all up on merge):
On Tue, 11 Mar 2025 17:03:15 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:
> Currently in tap.c we (mostly) use ETH_MAX_MTU as the maximum length of
> an L2 frame. This define comes from the kernel, but it's badly named and
> used confusingly.
>
> First, it doesn't really have anything to do with Ethernet, which has no
> structural limit on frame lengths. It comes more from either a) IP which
> imposes a 64k datagram limit or b) from internal buffers used in various
> places in the kernel (and in passt).
>
> Worse, MTU generally means the maximum size of the IP (L3) datagram which
> may be transferred, _not_ counting the L2 headers. In the kernel
> ETH_MAX_MTU is sometimes used that way, but sometimes seems to be used as
> a maximum frame length, _including_ L2 headers. In tap.c we're mostly
> using it in the second way.
>
> Finally, each of our tap backends could have different limits on the frame
> size imposed by the mechanisms they're using.
>
> Start clearing up this confusion by replacing it in tap.c with new
> L2_MAX_LEN_* defines which specifically refer to the maximum L2 frame
> length for each backend.
>
> Signed-off-by: David Gibson <dgibson@redhat.com>
> ---
> tap.c | 18 ++++++++++++++----
> tap.h | 25 +++++++++++++++++++++++++
> 2 files changed, 39 insertions(+), 4 deletions(-)
>
> diff --git a/tap.c b/tap.c
> index fb306e75..4840dcfa 100644
> --- a/tap.c
> +++ b/tap.c
> @@ -62,6 +62,15 @@
> #include "vhost_user.h"
> #include "vu_common.h"
>
> +/* Maximum allowed frame lengths (including L2 header) */
> +
> +static_assert(L2_MAX_LEN_PASTA <= PACKET_MAX_LEN,
> + "packet pool can't store maximum size pasta frame");
> +static_assert(L2_MAX_LEN_PASST <= PACKET_MAX_LEN,
> + "packet pool can't store maximum size qemu socket frame");
> +static_assert(L2_MAX_LEN_VU <= PACKET_MAX_LEN,
> + "packet pool can't store maximum size vhost-user frame");
> +
> /* IPv4 (plus ARP) and IPv6 message batches from tap/guest to IP handlers */
> static PACKET_POOL_NOINIT(pool_tap4, TAP_MSGS, pkt_buf);
> static PACKET_POOL_NOINIT(pool_tap6, TAP_MSGS, pkt_buf);
> @@ -1097,7 +1106,8 @@ static void tap_passt_input(struct ctx *c, const struct timespec *now)
> while (n >= (ssize_t)sizeof(uint32_t)) {
> uint32_t l2len = ntohl_unaligned(p);
>
> - if (l2len < sizeof(struct ethhdr) || l2len > ETH_MAX_MTU) {
> + if (l2len < sizeof(struct ethhdr) ||
> + l2len > L2_MAX_LEN_PASST) {
No need to wrap this.
> err("Bad frame size from guest, resetting connection");
> tap_sock_reset(c);
> return;
> @@ -1151,8 +1161,8 @@ static void tap_pasta_input(struct ctx *c, const struct timespec *now)
>
> tap_flush_pools();
>
> - for (n = 0; n <= (ssize_t)(sizeof(pkt_buf) - ETH_MAX_MTU); n += len) {
> - len = read(c->fd_tap, pkt_buf + n, ETH_MAX_MTU);
> + for (n = 0; n <= (ssize_t)(sizeof(pkt_buf) - L2_MAX_LEN_PASTA); n += len) {
n += len should go on its own line now.
> + len = read(c->fd_tap, pkt_buf + n, L2_MAX_LEN_PASTA);
>
> if (len == 0) {
> die("EOF on tap device, exiting");
> @@ -1170,7 +1180,7 @@ static void tap_pasta_input(struct ctx *c, const struct timespec *now)
>
> /* Ignore frames of bad length */
> if (len < (ssize_t)sizeof(struct ethhdr) ||
> - len > (ssize_t)ETH_MAX_MTU)
> + len > (ssize_t)L2_MAX_LEN_PASTA)
> continue;
>
> tap_add_packet(c, len, pkt_buf + n);
> diff --git a/tap.h b/tap.h
> index a2c3b87d..140e3305 100644
> --- a/tap.h
> +++ b/tap.h
> @@ -6,6 +6,31 @@
> #ifndef TAP_H
> #define TAP_H
>
> +/** L2_MAX_LEN_PASTA - Maximum frame length for pasta mode (with L2 header)
> + *
> + * The kernel tuntap device imposes a maximum frame size of 65535 including
> + * 'hard_header_len' (14 bytes for L2 Ethernet in the case of "tap" mode).
Extra whitespaces in indentation.
> + */
> +#define L2_MAX_LEN_PASTA USHRT_MAX
> +
> +/** L2_MAX_LEN_PASST - Maximum frame length for passt mode (with L2 header)
> + *
> + * The only structural limit the Qemu socket protocol imposes on frames is
QEMU
> + * (2^32-1) bytes, but that would be ludicrously long in practice. For now,
> + * limit it somewhat arbitrarily to 65535 bytes. FIXME: Work out an appropriate
> + * limit with more precision.
> + */
> +#define L2_MAX_LEN_PASST USHRT_MAX
> +
> +/** L2_MAX_LEN_VU - Maximum frame length for vhost-user mode (with L2 header)
> + *
> + * VU allows multiple buffers per frame, each of which can be quite large, so
vhost-user
> + * the inherent frame size limit is rather large. Much larger than is actually
> + * useful for IP. For now limit arbitrarily to 65535 bytes. FIXME: Work out an
> + * appropriate limit with more precision.
> + */
> +#define L2_MAX_LEN_VU USHRT_MAX
> +
> struct udphdr;
>
> /**
--
Stefano
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 6/9] tap: Use explicit defines for maximum length of L2 frame
2025-03-11 22:45 ` Stefano Brivio
@ 2025-03-12 0:56 ` David Gibson
0 siblings, 0 replies; 14+ messages in thread
From: David Gibson @ 2025-03-12 0:56 UTC (permalink / raw)
To: Stefano Brivio; +Cc: passt-dev, David Gibson
[-- Attachment #1: Type: text/plain, Size: 835 bytes --]
On Tue, Mar 11, 2025 at 11:45:09PM +0100, Stefano Brivio wrote:
> Nits only (I can fix it all up on merge):
Actually, I spotted one other small change I'd like to make here, so I
might as well respin.
> On Tue, 11 Mar 2025 17:03:15 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
[snip]
> > + */
> > +#define L2_MAX_LEN_PASTA USHRT_MAX
> > +
> > +/** L2_MAX_LEN_PASST - Maximum frame length for passt mode (with L2 header)
> > + *
> > + * The only structural limit the Qemu socket protocol imposes on frames is
>
> QEMU
For some reason, the standard style for capitalizing QEMU never sticks
in my head.
--
David Gibson (he or they) | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you, not the other way
| around.
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 7/9] Simplify sizing of pkt_buf
2025-03-11 6:03 [PATCH 0/9] Improve handling of MTU limits David Gibson
` (5 preceding siblings ...)
2025-03-11 6:03 ` [PATCH 6/9] tap: Use explicit defines for maximum length of L2 frame David Gibson
@ 2025-03-11 6:03 ` David Gibson
2025-03-11 6:03 ` [PATCH 8/9] pcap: Correctly set snaplen based on tap backend type David Gibson
2025-03-11 6:03 ` [PATCH 9/9] conf: Limit maximum MTU based on backend frame size David Gibson
8 siblings, 0 replies; 14+ messages in thread
From: David Gibson @ 2025-03-11 6:03 UTC (permalink / raw)
To: passt-dev, Stefano Brivio; +Cc: David Gibson
We define the size of pkt_buf as large enough to hold 128 maximum size
packets. Well, approximately, since we round down to the page size. We
don't have any specific reliance on how many packets can fit in the buffer,
we just want it to be big enough to allow reasonable batching. The
current definition relies on the confusingly named ETH_MAX_MTU and adds
in sizeof(uint32_t) rather non-obviously for the pseudo-physical header
used by the qemu socket (passt mode) protocol.
Instead, just define it to be 8MiB, which is what that complex calculation
works out to.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
passt.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/passt.h b/passt.h
index 6b248051..8f450912 100644
--- a/passt.h
+++ b/passt.h
@@ -69,8 +69,8 @@ union epoll_ref {
static_assert(sizeof(union epoll_ref) <= sizeof(union epoll_data),
"epoll_ref must have same size as epoll_data");
-#define PKT_BUF_BYTES \
- ROUND_DOWN(((ETH_MAX_MTU + sizeof(uint32_t)) * 128), PAGE_SIZE)
+/* Large enough for ~128 maximum size frames */
+#define PKT_BUF_BYTES (8UL << 20)
#define TAP_MSGS \
DIV_ROUND_UP(PKT_BUF_BYTES, ETH_ZLEN - 2 * ETH_ALEN + sizeof(uint32_t))
--
@@ -69,8 +69,8 @@ union epoll_ref {
static_assert(sizeof(union epoll_ref) <= sizeof(union epoll_data),
"epoll_ref must have same size as epoll_data");
-#define PKT_BUF_BYTES \
- ROUND_DOWN(((ETH_MAX_MTU + sizeof(uint32_t)) * 128), PAGE_SIZE)
+/* Large enough for ~128 maximum size frames */
+#define PKT_BUF_BYTES (8UL << 20)
#define TAP_MSGS \
DIV_ROUND_UP(PKT_BUF_BYTES, ETH_ZLEN - 2 * ETH_ALEN + sizeof(uint32_t))
--
2.48.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 8/9] pcap: Correctly set snaplen based on tap backend type
2025-03-11 6:03 [PATCH 0/9] Improve handling of MTU limits David Gibson
` (6 preceding siblings ...)
2025-03-11 6:03 ` [PATCH 7/9] Simplify sizing of pkt_buf David Gibson
@ 2025-03-11 6:03 ` David Gibson
2025-03-11 6:03 ` [PATCH 9/9] conf: Limit maximum MTU based on backend frame size David Gibson
8 siblings, 0 replies; 14+ messages in thread
From: David Gibson @ 2025-03-11 6:03 UTC (permalink / raw)
To: passt-dev, Stefano Brivio; +Cc: David Gibson
The pcap header includes a value indicating how much of each frame is
captured. We always capture the entire frame, so we want to set this to
the maximum possible frame size. Currently we do that by setting it to
ETH_MAX_MTU, but that's a confusingly named constant which might not always
be correct depending on the details of our tap backend.
Instead add a tap_l2_max_len() function that explicitly returns the maximum
frame size for the current mode and use that to set snaplen. While we're
there, there's no particular need for the pcap header to be defined in a
global; make it local to pcap_init() instead.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
pcap.c | 46 ++++++++++++++++++++++++----------------------
tap.c | 19 +++++++++++++++++++
tap.h | 1 +
3 files changed, 44 insertions(+), 22 deletions(-)
diff --git a/pcap.c b/pcap.c
index 3d623cfe..e95aa6fe 100644
--- a/pcap.c
+++ b/pcap.c
@@ -33,33 +33,12 @@
#include "log.h"
#include "pcap.h"
#include "iov.h"
+#include "tap.h"
#define PCAP_VERSION_MINOR 4
static int pcap_fd = -1;
-/* See pcap.h from libpcap, or pcap-savefile(5) */
-static const struct {
- uint32_t magic;
-#define PCAP_MAGIC 0xa1b2c3d4
-
- uint16_t major;
-#define PCAP_VERSION_MAJOR 2
-
- uint16_t minor;
-#define PCAP_VERSION_MINOR 4
-
- int32_t thiszone;
- uint32_t sigfigs;
- uint32_t snaplen;
-
- uint32_t linktype;
-#define PCAP_LINKTYPE_ETHERNET 1
-} pcap_hdr = {
- PCAP_MAGIC, PCAP_VERSION_MAJOR, PCAP_VERSION_MINOR, 0, 0, ETH_MAX_MTU,
- PCAP_LINKTYPE_ETHERNET
-};
-
struct pcap_pkthdr {
uint32_t tv_sec;
uint32_t tv_usec;
@@ -162,6 +141,29 @@ void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset)
*/
void pcap_init(struct ctx *c)
{
+ /* See pcap.h from libpcap, or pcap-savefile(5) */
+#define PCAP_MAGIC 0xa1b2c3d4
+#define PCAP_VERSION_MAJOR 2
+#define PCAP_VERSION_MINOR 4
+#define PCAP_LINKTYPE_ETHERNET 1
+ const struct {
+ uint32_t magic;
+ uint16_t major;
+ uint16_t minor;
+
+ int32_t thiszone;
+ uint32_t sigfigs;
+ uint32_t snaplen;
+
+ uint32_t linktype;
+ } pcap_hdr = {
+ .magic = PCAP_MAGIC,
+ .major = PCAP_VERSION_MAJOR,
+ .minor = PCAP_VERSION_MINOR,
+ .snaplen = tap_l2_max_len(c),
+ .linktype = PCAP_LINKTYPE_ETHERNET
+ };
+
if (pcap_fd != -1)
return;
diff --git a/tap.c b/tap.c
index 4840dcfa..7cace71a 100644
--- a/tap.c
+++ b/tap.c
@@ -78,6 +78,25 @@ static PACKET_POOL_NOINIT(pool_tap6, TAP_MSGS, pkt_buf);
#define TAP_SEQS 128 /* Different L4 tuples in one batch */
#define FRAGMENT_MSG_RATE 10 /* # seconds between fragment warnings */
+/**
+ * tap_l2_max_len() - Maximum frame size (including L2 header) for current mode
+ * @c: Execution context
+ */
+unsigned long tap_l2_max_len(const struct ctx *c)
+{
+ /* NOLINTBEGIN(bugprone-branch-clone): values can be the same */
+ switch (c->mode) {
+ case MODE_PASST:
+ return L2_MAX_LEN_PASST;
+ case MODE_PASTA:
+ return L2_MAX_LEN_PASTA;
+ case MODE_VU:
+ return L2_MAX_LEN_VU;
+ }
+ /* NOLINTEND(bugprone-branch-clone) */
+ ASSERT(0);
+}
+
/**
* tap_send_single() - Send a single frame
* @c: Execution context
diff --git a/tap.h b/tap.h
index 140e3305..2d2221e2 100644
--- a/tap.h
+++ b/tap.h
@@ -69,6 +69,7 @@ static inline void tap_hdr_update(struct tap_hdr *thdr, size_t l2len)
thdr->vnet_len = htonl(l2len);
}
+unsigned long tap_l2_max_len(const struct ctx *c);
void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto);
void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
struct in_addr dst, size_t l4len, uint8_t proto);
--
@@ -69,6 +69,7 @@ static inline void tap_hdr_update(struct tap_hdr *thdr, size_t l2len)
thdr->vnet_len = htonl(l2len);
}
+unsigned long tap_l2_max_len(const struct ctx *c);
void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto);
void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
struct in_addr dst, size_t l4len, uint8_t proto);
--
2.48.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 9/9] conf: Limit maximum MTU based on backend frame size
2025-03-11 6:03 [PATCH 0/9] Improve handling of MTU limits David Gibson
` (7 preceding siblings ...)
2025-03-11 6:03 ` [PATCH 8/9] pcap: Correctly set snaplen based on tap backend type David Gibson
@ 2025-03-11 6:03 ` David Gibson
8 siblings, 0 replies; 14+ messages in thread
From: David Gibson @ 2025-03-11 6:03 UTC (permalink / raw)
To: passt-dev, Stefano Brivio; +Cc: David Gibson
The -m option controls the MTU, that is the maximum transmissible L3
datagram, not including L2 headers. We currently limit it to ETH_MAX_MTU
which sounds like it makes sense. But ETH_MAX_MTU is confusing: it's not
consistently used as to whether it means the maximum L3 datagram size or
the maximum L2 frame size. Even within conf() we explicitly account for
the L2 header size when computing the default --mtu value, but not when
we compute the maximum --mtu value.
Clean this up by reworking the maximum MTU computation to be the minimum of
IP_MAX_MTU (65535) and the maximum sized IP datagram which can fit into
our L2 frames when we account for the L2 header. The latter can vary
depending on our tap backend, although it doesn't right now.
Link: https://bugs.passt.top/show_bug.cgi?id=66
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
conf.c | 11 +++++++----
util.h | 3 ---
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/conf.c b/conf.c
index b58e2a6e..c760f791 100644
--- a/conf.c
+++ b/conf.c
@@ -1434,6 +1434,7 @@ void conf(struct ctx *c, int argc, char **argv)
enum fwd_ports_mode fwd_default = FWD_NONE;
bool v4_only = false, v6_only = false;
unsigned dns4_idx = 0, dns6_idx = 0;
+ unsigned long max_mtu = IP_MAX_MTU;
struct fqdn *dnss = c->dns_search;
unsigned int ifi4 = 0, ifi6 = 0;
const char *logfile = NULL;
@@ -1449,7 +1450,9 @@ void conf(struct ctx *c, int argc, char **argv)
fwd_default = FWD_AUTO;
}
- c->mtu = ROUND_DOWN(ETH_MAX_MTU - ETH_HLEN, sizeof(uint32_t));
+ if (tap_l2_max_len(c) - ETH_HLEN < max_mtu)
+ max_mtu = tap_l2_max_len(c) - ETH_HLEN;
+ c->mtu = ROUND_DOWN(max_mtu, sizeof(uint32_t));
c->tcp.fwd_in.mode = c->tcp.fwd_out.mode = FWD_UNSET;
c->udp.fwd_in.mode = c->udp.fwd_out.mode = FWD_UNSET;
memcpy(c->our_tap_mac, MAC_OUR_LAA, ETH_ALEN);
@@ -1711,9 +1714,9 @@ void conf(struct ctx *c, int argc, char **argv)
if (errno || *e)
die("Invalid MTU: %s", optarg);
- if (mtu > ETH_MAX_MTU) {
- die("MTU %lu too large (max %u)",
- mtu, ETH_MAX_MTU);
+ if (mtu > max_mtu) {
+ die("MTU %lu too large (max %lu)",
+ mtu, max_mtu);
}
c->mtu = mtu;
diff --git a/util.h b/util.h
index 0f70f4d4..4d512fab 100644
--- a/util.h
+++ b/util.h
@@ -31,9 +31,6 @@
#ifndef SECCOMP_RET_KILL_PROCESS
#define SECCOMP_RET_KILL_PROCESS SECCOMP_RET_KILL
#endif
-#ifndef ETH_MAX_MTU
-#define ETH_MAX_MTU USHRT_MAX
-#endif
#ifndef IP_MAX_MTU
#define IP_MAX_MTU USHRT_MAX
#endif
--
@@ -31,9 +31,6 @@
#ifndef SECCOMP_RET_KILL_PROCESS
#define SECCOMP_RET_KILL_PROCESS SECCOMP_RET_KILL
#endif
-#ifndef ETH_MAX_MTU
-#define ETH_MAX_MTU USHRT_MAX
-#endif
#ifndef IP_MAX_MTU
#define IP_MAX_MTU USHRT_MAX
#endif
--
2.48.1
^ permalink raw reply related [flat|nested] 14+ messages in thread