From: Stefano Brivio <sbrivio@redhat.com>
To: passt-dev@passt.top
Subject: [PATCH 11/18] tcp, udp: Receive batching doesn't pay off when writing single frames to tap
Date: Tue, 22 Feb 2022 02:34:27 +0100 [thread overview]
Message-ID: <20220222013434.4116044-12-sbrivio@redhat.com> (raw)
In-Reply-To: <20220222013434.4116044-1-sbrivio@redhat.com>
[-- Attachment #1: Type: text/plain, Size: 8353 bytes --]
In pasta mode, when we get data from sockets and write it as single
frames to the tap device, we batch receive operations considerably,
and then (conceptually) split the data in many smaller writes.
It looked like an obvious choice, but performance is actually better
if we receive data in many small frame-sized recvmsg()/recvmmsg().
The syscall overhead with the previous behaviour, observed by perf,
comes predominantly from write operations, but receiving data in
shorter chunks probably improves cache locality by a considerable
amount.
Signed-off-by: Stefano Brivio <sbrivio(a)redhat.com>
---
tcp.c | 36 ++++++++++++++++++++----------------
udp.c | 33 +++++++++++++++++----------------
2 files changed, 37 insertions(+), 32 deletions(-)
diff --git a/tcp.c b/tcp.c
index e4fac22..a3a9dfd 100644
--- a/tcp.c
+++ b/tcp.c
@@ -343,7 +343,9 @@
#define MAX_TAP_CONNS (128 * 1024)
#define MAX_SPLICE_CONNS (128 * 1024)
-#define TCP_TAP_FRAMES 256
+#define TCP_TAP_FRAMES_MEM 256
+#define TCP_TAP_FRAMES \
+ (c->mode == MODE_PASST ? TCP_TAP_FRAMES_MEM : 1)
#define MAX_PIPE_SIZE (2UL * 1024 * 1024)
@@ -609,7 +611,7 @@ static struct tcp4_l2_buf_t {
#else
} __attribute__ ((packed, aligned(__alignof__(unsigned int))))
#endif
-tcp4_l2_buf[TCP_TAP_FRAMES];
+tcp4_l2_buf[TCP_TAP_FRAMES_MEM];
static unsigned int tcp4_l2_buf_used;
static size_t tcp4_l2_buf_bytes;
@@ -640,21 +642,21 @@ struct tcp6_l2_buf_t {
#else
} __attribute__ ((packed, aligned(__alignof__(unsigned int))))
#endif
-tcp6_l2_buf[TCP_TAP_FRAMES];
+tcp6_l2_buf[TCP_TAP_FRAMES_MEM];
static unsigned int tcp6_l2_buf_used;
static size_t tcp6_l2_buf_bytes;
/* recvmsg()/sendmsg() data for tap */
static char tcp_buf_discard [MAX_WINDOW];
-static struct iovec iov_sock [TCP_TAP_FRAMES + 1];
+static struct iovec iov_sock [TCP_TAP_FRAMES_MEM + 1];
-static struct iovec tcp4_l2_iov_tap [TCP_TAP_FRAMES];
-static struct iovec tcp6_l2_iov_tap [TCP_TAP_FRAMES];
-static struct iovec tcp4_l2_flags_iov_tap [TCP_TAP_FRAMES];
-static struct iovec tcp6_l2_flags_iov_tap [TCP_TAP_FRAMES];
+static struct iovec tcp4_l2_iov_tap [TCP_TAP_FRAMES_MEM];
+static struct iovec tcp6_l2_iov_tap [TCP_TAP_FRAMES_MEM];
+static struct iovec tcp4_l2_flags_iov_tap [TCP_TAP_FRAMES_MEM];
+static struct iovec tcp6_l2_flags_iov_tap [TCP_TAP_FRAMES_MEM];
-static struct mmsghdr tcp_l2_mh_tap [TCP_TAP_FRAMES];
+static struct mmsghdr tcp_l2_mh_tap [TCP_TAP_FRAMES_MEM];
/* sendmsg() to socket */
static struct iovec tcp_tap_iov [UIO_MAXIOV];
@@ -688,7 +690,7 @@ static struct tcp4_l2_flags_buf_t {
#else
} __attribute__ ((packed, aligned(__alignof__(unsigned int))))
#endif
-tcp4_l2_flags_buf[TCP_TAP_FRAMES];
+tcp4_l2_flags_buf[TCP_TAP_FRAMES_MEM];
static int tcp4_l2_flags_buf_used;
@@ -717,7 +719,7 @@ static struct tcp6_l2_flags_buf_t {
#else
} __attribute__ ((packed, aligned(__alignof__(unsigned int))))
#endif
-tcp6_l2_flags_buf[TCP_TAP_FRAMES];
+tcp6_l2_flags_buf[TCP_TAP_FRAMES_MEM];
static int tcp6_l2_flags_buf_used;
@@ -916,7 +918,7 @@ void tcp_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s,
{
int i;
- for (i = 0; i < TCP_TAP_FRAMES; i++) {
+ for (i = 0; i < TCP_TAP_FRAMES_MEM; i++) {
struct tcp4_l2_flags_buf_t *b4f = &tcp4_l2_flags_buf[i];
struct tcp6_l2_flags_buf_t *b6f = &tcp6_l2_flags_buf[i];
struct tcp4_l2_buf_t *b4 = &tcp4_l2_buf[i];
@@ -982,12 +984,13 @@ static void tcp_sock4_iov_init(void)
};
}
- for (i = 0, iov = tcp4_l2_iov_tap; i < TCP_TAP_FRAMES; i++, iov++) {
+ for (i = 0, iov = tcp4_l2_iov_tap; i < TCP_TAP_FRAMES_MEM; i++, iov++) {
iov->iov_base = &tcp4_l2_buf[i].vnet_len;
iov->iov_len = MSS_DEFAULT;
}
- for (i = 0, iov = tcp4_l2_flags_iov_tap; i < TCP_TAP_FRAMES; i++, iov++)
+ for (i = 0, iov = tcp4_l2_flags_iov_tap; i < TCP_TAP_FRAMES_MEM;
+ i++, iov++)
iov->iov_base = &tcp4_l2_flags_buf[i].vnet_len;
}
@@ -1015,12 +1018,13 @@ static void tcp_sock6_iov_init(void)
};
}
- for (i = 0, iov = tcp6_l2_iov_tap; i < TCP_TAP_FRAMES; i++, iov++) {
+ for (i = 0, iov = tcp6_l2_iov_tap; i < TCP_TAP_FRAMES_MEM; i++, iov++) {
iov->iov_base = &tcp6_l2_buf[i].vnet_len;
iov->iov_len = MSS_DEFAULT;
}
- for (i = 0, iov = tcp6_l2_flags_iov_tap; i < TCP_TAP_FRAMES; i++, iov++)
+ for (i = 0, iov = tcp6_l2_flags_iov_tap; i < TCP_TAP_FRAMES_MEM;
+ i++, iov++)
iov->iov_base = &tcp6_l2_flags_buf[i].vnet_len;
}
diff --git a/udp.c b/udp.c
index 8129a89..d4f3714 100644
--- a/udp.c
+++ b/udp.c
@@ -118,7 +118,8 @@
#define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */
#define UDP_SPLICE_FRAMES 128
-#define UDP_TAP_FRAMES 128
+#define UDP_TAP_FRAMES_MEM 128
+#define UDP_TAP_FRAMES (c->mode == MODE_PASST ? UDP_TAP_FRAMES_MEM : 1)
/**
* struct udp_tap_port - Port tracking based on tap-facing source port
@@ -204,7 +205,7 @@ static struct udp4_l2_buf_t {
uint8_t data[USHRT_MAX -
(sizeof(struct iphdr) + sizeof(struct udphdr))];
} __attribute__ ((packed, aligned(__alignof__(unsigned int))))
-udp4_l2_buf[UDP_TAP_FRAMES];
+udp4_l2_buf[UDP_TAP_FRAMES_MEM];
/**
* udp6_l2_buf_t - Pre-cooked IPv6 packet buffers for tap connections
@@ -234,23 +235,23 @@ struct udp6_l2_buf_t {
#else
} __attribute__ ((packed, aligned(__alignof__(unsigned int))))
#endif
-udp6_l2_buf[UDP_TAP_FRAMES];
+udp6_l2_buf[UDP_TAP_FRAMES_MEM];
static struct sockaddr_storage udp_splice_namebuf;
static uint8_t udp_splice_buf[UDP_SPLICE_FRAMES][USHRT_MAX];
/* recvmmsg()/sendmmsg() data for tap */
-static struct iovec udp4_l2_iov_sock [UDP_TAP_FRAMES];
-static struct iovec udp6_l2_iov_sock [UDP_TAP_FRAMES];
+static struct iovec udp4_l2_iov_sock [UDP_TAP_FRAMES_MEM];
+static struct iovec udp6_l2_iov_sock [UDP_TAP_FRAMES_MEM];
-static struct iovec udp4_l2_iov_tap [UDP_TAP_FRAMES];
-static struct iovec udp6_l2_iov_tap [UDP_TAP_FRAMES];
+static struct iovec udp4_l2_iov_tap [UDP_TAP_FRAMES_MEM];
+static struct iovec udp6_l2_iov_tap [UDP_TAP_FRAMES_MEM];
-static struct mmsghdr udp4_l2_mh_sock [UDP_TAP_FRAMES];
-static struct mmsghdr udp6_l2_mh_sock [UDP_TAP_FRAMES];
+static struct mmsghdr udp4_l2_mh_sock [UDP_TAP_FRAMES_MEM];
+static struct mmsghdr udp6_l2_mh_sock [UDP_TAP_FRAMES_MEM];
-static struct mmsghdr udp4_l2_mh_tap [UDP_TAP_FRAMES];
-static struct mmsghdr udp6_l2_mh_tap [UDP_TAP_FRAMES];
+static struct mmsghdr udp4_l2_mh_tap [UDP_TAP_FRAMES_MEM];
+static struct mmsghdr udp6_l2_mh_tap [UDP_TAP_FRAMES_MEM];
/* recvmmsg()/sendmmsg() data for "spliced" connections */
static struct iovec udp_splice_iov_recv [UDP_SPLICE_FRAMES];
@@ -310,7 +311,7 @@ void udp_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s,
{
int i;
- for (i = 0; i < UDP_TAP_FRAMES; i++) {
+ for (i = 0; i < UDP_TAP_FRAMES_MEM; i++) {
struct udp4_l2_buf_t *b4 = &udp4_l2_buf[i];
struct udp6_l2_buf_t *b6 = &udp6_l2_buf[i];
@@ -354,7 +355,7 @@ static void udp_sock4_iov_init(void)
};
}
- for (i = 0, h = udp4_l2_mh_sock; i < UDP_TAP_FRAMES; i++, h++) {
+ for (i = 0, h = udp4_l2_mh_sock; i < UDP_TAP_FRAMES_MEM; i++, h++) {
struct msghdr *mh = &h->msg_hdr;
mh->msg_name = &udp4_l2_buf[i].s_in;
@@ -366,7 +367,7 @@ static void udp_sock4_iov_init(void)
mh->msg_iovlen = 1;
}
- for (i = 0, h = udp4_l2_mh_tap; i < UDP_TAP_FRAMES; i++, h++) {
+ for (i = 0, h = udp4_l2_mh_tap; i < UDP_TAP_FRAMES_MEM; i++, h++) {
struct msghdr *mh = &h->msg_hdr;
udp4_l2_iov_tap[i].iov_base = &udp4_l2_buf[i].vnet_len;
@@ -394,7 +395,7 @@ static void udp_sock6_iov_init(void)
};
}
- for (i = 0, h = udp6_l2_mh_sock; i < UDP_TAP_FRAMES; i++, h++) {
+ for (i = 0, h = udp6_l2_mh_sock; i < UDP_TAP_FRAMES_MEM; i++, h++) {
struct msghdr *mh = &h->msg_hdr;
mh->msg_name = &udp6_l2_buf[i].s_in6;
@@ -406,7 +407,7 @@ static void udp_sock6_iov_init(void)
mh->msg_iovlen = 1;
}
- for (i = 0, h = udp6_l2_mh_tap; i < UDP_TAP_FRAMES; i++, h++) {
+ for (i = 0, h = udp6_l2_mh_tap; i < UDP_TAP_FRAMES_MEM; i++, h++) {
struct msghdr *mh = &h->msg_hdr;
udp6_l2_iov_tap[i].iov_base = &udp6_l2_buf[i].vnet_len;
--
@@ -118,7 +118,8 @@
#define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */
#define UDP_SPLICE_FRAMES 128
-#define UDP_TAP_FRAMES 128
+#define UDP_TAP_FRAMES_MEM 128
+#define UDP_TAP_FRAMES (c->mode == MODE_PASST ? UDP_TAP_FRAMES_MEM : 1)
/**
* struct udp_tap_port - Port tracking based on tap-facing source port
@@ -204,7 +205,7 @@ static struct udp4_l2_buf_t {
uint8_t data[USHRT_MAX -
(sizeof(struct iphdr) + sizeof(struct udphdr))];
} __attribute__ ((packed, aligned(__alignof__(unsigned int))))
-udp4_l2_buf[UDP_TAP_FRAMES];
+udp4_l2_buf[UDP_TAP_FRAMES_MEM];
/**
* udp6_l2_buf_t - Pre-cooked IPv6 packet buffers for tap connections
@@ -234,23 +235,23 @@ struct udp6_l2_buf_t {
#else
} __attribute__ ((packed, aligned(__alignof__(unsigned int))))
#endif
-udp6_l2_buf[UDP_TAP_FRAMES];
+udp6_l2_buf[UDP_TAP_FRAMES_MEM];
static struct sockaddr_storage udp_splice_namebuf;
static uint8_t udp_splice_buf[UDP_SPLICE_FRAMES][USHRT_MAX];
/* recvmmsg()/sendmmsg() data for tap */
-static struct iovec udp4_l2_iov_sock [UDP_TAP_FRAMES];
-static struct iovec udp6_l2_iov_sock [UDP_TAP_FRAMES];
+static struct iovec udp4_l2_iov_sock [UDP_TAP_FRAMES_MEM];
+static struct iovec udp6_l2_iov_sock [UDP_TAP_FRAMES_MEM];
-static struct iovec udp4_l2_iov_tap [UDP_TAP_FRAMES];
-static struct iovec udp6_l2_iov_tap [UDP_TAP_FRAMES];
+static struct iovec udp4_l2_iov_tap [UDP_TAP_FRAMES_MEM];
+static struct iovec udp6_l2_iov_tap [UDP_TAP_FRAMES_MEM];
-static struct mmsghdr udp4_l2_mh_sock [UDP_TAP_FRAMES];
-static struct mmsghdr udp6_l2_mh_sock [UDP_TAP_FRAMES];
+static struct mmsghdr udp4_l2_mh_sock [UDP_TAP_FRAMES_MEM];
+static struct mmsghdr udp6_l2_mh_sock [UDP_TAP_FRAMES_MEM];
-static struct mmsghdr udp4_l2_mh_tap [UDP_TAP_FRAMES];
-static struct mmsghdr udp6_l2_mh_tap [UDP_TAP_FRAMES];
+static struct mmsghdr udp4_l2_mh_tap [UDP_TAP_FRAMES_MEM];
+static struct mmsghdr udp6_l2_mh_tap [UDP_TAP_FRAMES_MEM];
/* recvmmsg()/sendmmsg() data for "spliced" connections */
static struct iovec udp_splice_iov_recv [UDP_SPLICE_FRAMES];
@@ -310,7 +311,7 @@ void udp_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s,
{
int i;
- for (i = 0; i < UDP_TAP_FRAMES; i++) {
+ for (i = 0; i < UDP_TAP_FRAMES_MEM; i++) {
struct udp4_l2_buf_t *b4 = &udp4_l2_buf[i];
struct udp6_l2_buf_t *b6 = &udp6_l2_buf[i];
@@ -354,7 +355,7 @@ static void udp_sock4_iov_init(void)
};
}
- for (i = 0, h = udp4_l2_mh_sock; i < UDP_TAP_FRAMES; i++, h++) {
+ for (i = 0, h = udp4_l2_mh_sock; i < UDP_TAP_FRAMES_MEM; i++, h++) {
struct msghdr *mh = &h->msg_hdr;
mh->msg_name = &udp4_l2_buf[i].s_in;
@@ -366,7 +367,7 @@ static void udp_sock4_iov_init(void)
mh->msg_iovlen = 1;
}
- for (i = 0, h = udp4_l2_mh_tap; i < UDP_TAP_FRAMES; i++, h++) {
+ for (i = 0, h = udp4_l2_mh_tap; i < UDP_TAP_FRAMES_MEM; i++, h++) {
struct msghdr *mh = &h->msg_hdr;
udp4_l2_iov_tap[i].iov_base = &udp4_l2_buf[i].vnet_len;
@@ -394,7 +395,7 @@ static void udp_sock6_iov_init(void)
};
}
- for (i = 0, h = udp6_l2_mh_sock; i < UDP_TAP_FRAMES; i++, h++) {
+ for (i = 0, h = udp6_l2_mh_sock; i < UDP_TAP_FRAMES_MEM; i++, h++) {
struct msghdr *mh = &h->msg_hdr;
mh->msg_name = &udp6_l2_buf[i].s_in6;
@@ -406,7 +407,7 @@ static void udp_sock6_iov_init(void)
mh->msg_iovlen = 1;
}
- for (i = 0, h = udp6_l2_mh_tap; i < UDP_TAP_FRAMES; i++, h++) {
+ for (i = 0, h = udp6_l2_mh_tap; i < UDP_TAP_FRAMES_MEM; i++, h++) {
struct msghdr *mh = &h->msg_hdr;
udp6_l2_iov_tap[i].iov_base = &udp6_l2_buf[i].vnet_len;
--
2.34.1
next prev parent reply other threads:[~2022-02-22 1:34 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-02-22 1:34 [PATCH 00/18] slirp4netns, sandboxing, Podman integration, assorted fixes Stefano Brivio
2022-02-22 1:34 ` [PATCH 01/18] slirp4netns: Look up pasta command, exit if not found Stefano Brivio
2022-02-22 1:34 ` [PATCH 02/18] slirp4netns: Add EXIT as condition for trap Stefano Brivio
2022-02-22 1:34 ` [PATCH 03/18] passt, pasta: Namespace-based sandboxing, defer seccomp policy application Stefano Brivio
2022-02-22 1:34 ` [PATCH 04/18] passt: Make process not dumpable after sandboxing Stefano Brivio
2022-02-22 1:34 ` [PATCH 05/18] Makefile, conf, passt: Drop passt4netns references, explicit argc check Stefano Brivio
2022-02-22 1:34 ` [PATCH 06/18] slirp4netns.sh: Implement API socket option for port forwarding Stefano Brivio
2022-02-22 1:34 ` [PATCH 07/18] conf: Don't print configuration on --quiet Stefano Brivio
2022-02-22 1:34 ` [PATCH 08/18] conf: Given IPv4 address and no netmask, assign RFC 790-style classes Stefano Brivio
2022-02-22 1:34 ` [PATCH 09/18] conf, udp: Introduce basic DNS forwarding Stefano Brivio
2022-02-22 1:34 ` [PATCH 10/18] udp: Allow loopback connections from host using configured unicast address Stefano Brivio
2022-02-22 1:34 ` Stefano Brivio [this message]
2022-02-22 1:34 ` [PATCH 12/18] pasta: By default, quit if filesystem-bound net namespace goes away Stefano Brivio
2022-02-22 1:34 ` [PATCH 13/18] test/distro/ubuntu: Use DEBIAN_FRONTEND=noninteractive for apt on 22.04 Stefano Brivio
2022-02-22 1:34 ` [PATCH 14/18] test/perf/passt_udp: Drop threshold for 256B test Stefano Brivio
2022-02-22 1:34 ` [PATCH 15/18] man page: Update REPORTING BUGS section Stefano Brivio
2022-02-22 1:34 ` [PATCH 16/18] README, hooks: Build HTML man page on push, add a link Stefano Brivio
2022-02-22 1:34 ` [PATCH 17/18] contrib: Add patch for Podman integration Stefano Brivio
2022-02-22 1:34 ` [PATCH 18/18] test: Add demo for Podman with pasta Stefano Brivio
2022-02-22 9:07 ` [PATCH 00/18] slirp4netns, sandboxing, Podman integration, assorted fixes Stefano Brivio
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220222013434.4116044-12-sbrivio@redhat.com \
--to=sbrivio@redhat.com \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).