From: Laurent Vivier <lvivier@redhat.com>
To: passt-dev@passt.top
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: Re: [RFC v4] tcp: Replace TCP buffer structure by an iovec array
Date: Tue, 26 Mar 2024 11:19:22 +0100 [thread overview]
Message-ID: <91c44be3-7643-407c-a58d-41476a721d59@redhat.com> (raw)
In-Reply-To: <20240321102655.2003763-1-lvivier@redhat.com>
[-- Attachment #1: Type: text/plain, Size: 2494 bytes --]
Hi,
I compared perf result using this patch and a patch changing tap_send_frames_passt() to:
static size_t tap_send_frames_passt(const struct ctx *c,
const struct iovec *iov,
size_t bufs_per_frame, size_t nframes)
{
struct msghdr mh = {
.msg_iovlen = bufs_per_frame,
};
size_t buf_offset;
unsigned int i;
ssize_t sent;
for (i = 0; i < nframes; i++) {
unsigned int j;
if (bufs_per_frame > 1) {
/* if we have more than 1 iovec, the first one is vnet_len */
uint32_t *p = iov[i * bufs_per_frame].iov_base;
uint32_t vnet_len = 0;
for (j = 1; j < bufs_per_frame; j++)
vnet_len += iov[i * bufs_per_frame + j].iov_len;
vnet_len = htonl(vnet_len);
*p = vnet_len;
}
mh.msg_iov = (void *)&iov[i * bufs_per_frame];
sent = sendmsg(c->fd_tap, &mh, MSG_NOSIGNAL | MSG_DONTWAIT);
if (sent < 0)
return i;
/* Check for any partial frames due to short send */
j = iov_skip_bytes(&iov[i * bufs_per_frame], bufs_per_frame, sent,
&buf_offset);
if (buf_offset && j < bufs_per_frame) {
if (write_remainder(c->fd_tap, &iov[i * bufs_per_frame + j],
bufs_per_frame - j,
buf_offset) < 0) {
err("tap: partial frame send: %s",
strerror(errno));
return i;
}
}
}
return i;
}
And the result of 'perf record -e cache-misses' gives:
slow
83.95% passt.avx2 passt.avx2 [.] csum_avx2
4.39% passt.avx2 passt.avx2 [.] tap4_handler
2.37% passt.avx2 libc.so.6 [.] __printf_buffer
0.84% passt.avx2 passt.avx2 [.] udp_timer
fast
22.15% passt.avx2 passt.avx2 [.] csum_avx2
14.91% passt.avx2 passt.avx2 [.] udp_timer
7.60% passt.avx2 libc.so.6 [.] __printf_buffer
5.10% passt.avx2 passt.avx2 [.] ffsl
Thanks,
Laurent
[-- Attachment #2: 0001-tap-compute-vnet_len-inside-tap_send_frames_passt.patch --]
[-- Type: text/x-patch, Size: 8448 bytes --]
From d4b3e12132ceaf5484de215e9c84cbedcbbb8188 Mon Sep 17 00:00:00 2001
From: Laurent Vivier <lvivier@redhat.com>
Date: Tue, 19 Mar 2024 18:20:20 +0100
Subject: [PATCH] tap: compute vnet_len inside tap_send_frames_passt()
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
tap.c | 49 +++++++++++++++++++++++++++++++++----------------
tcp.c | 39 ++++++++++-----------------------------
2 files changed, 43 insertions(+), 45 deletions(-)
diff --git a/tap.c b/tap.c
index 13e4da79d690..1096272b411a 100644
--- a/tap.c
+++ b/tap.c
@@ -74,7 +74,7 @@ static PACKET_POOL_NOINIT(pool_tap6, TAP_MSGS, pkt_buf);
*/
void tap_send_single(const struct ctx *c, const void *data, size_t len)
{
- uint32_t vnet_len = htonl(len);
+ uint32_t vnet_len;
struct iovec iov[2];
size_t iovcnt = 0;
@@ -365,34 +365,51 @@ static size_t tap_send_frames_passt(const struct ctx *c,
const struct iovec *iov,
size_t bufs_per_frame, size_t nframes)
{
- size_t nbufs = bufs_per_frame * nframes;
struct msghdr mh = {
- .msg_iov = (void *)iov,
- .msg_iovlen = nbufs,
+ .msg_iovlen = bufs_per_frame,
};
size_t buf_offset;
unsigned int i;
ssize_t sent;
- sent = sendmsg(c->fd_tap, &mh, MSG_NOSIGNAL | MSG_DONTWAIT);
- if (sent < 0)
- return 0;
+ for (i = 0; i < nframes; i++) {
+ unsigned int j;
+
+ if (bufs_per_frame > 1) {
+ /* if we have more than one iovec, the first one is
+ * vnet_len
+ */
+ uint32_t *p = iov[i * bufs_per_frame].iov_base;
+ uint32_t vnet_len = 0;
- /* Check for any partial frames due to short send */
- i = iov_skip_bytes(iov, nbufs, sent, &buf_offset);
+ for (j = 1; j < bufs_per_frame; j++)
+ vnet_len += iov[i * bufs_per_frame + j].iov_len;
+ vnet_len = htonl(vnet_len);
+
+ *p = vnet_len;
+ }
- if (i < nbufs && (buf_offset || (i % bufs_per_frame))) {
- /* Number of unsent or partially sent buffers for the frame */
- size_t rembufs = bufs_per_frame - (i % bufs_per_frame);
+ mh.msg_iov = (void *)&iov[i * bufs_per_frame];
- if (write_remainder(c->fd_tap, &iov[i], rembufs, buf_offset) < 0) {
- err("tap: partial frame send: %s", strerror(errno));
+ sent = sendmsg(c->fd_tap, &mh, MSG_NOSIGNAL | MSG_DONTWAIT);
+ if (sent < 0)
return i;
+
+ /* Check for any partial frames due to short send */
+ j = iov_skip_bytes(&iov[i * bufs_per_frame], bufs_per_frame, sent, &buf_offset);
+
+ if (buf_offset && j < bufs_per_frame) {
+ if (write_remainder(c->fd_tap, &iov[i * bufs_per_frame + j],
+ bufs_per_frame - j,
+ buf_offset) < 0) {
+ err("tap: partial frame send: %s",
+ strerror(errno));
+ return i;
+ }
}
- i += rembufs;
}
- return i / bufs_per_frame;
+ return i;
}
/**
diff --git a/tcp.c b/tcp.c
index cc705064f059..d147e2c41648 100644
--- a/tcp.c
+++ b/tcp.c
@@ -443,10 +443,11 @@ struct tcp_flags_t {
} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
#endif
+static uint32_t vnet_len;
+
/* Ethernet header for IPv4 frames */
static struct ethhdr tcp4_eth_src;
-static uint32_t tcp4_payload_vnet_len[TCP_FRAMES_MEM];
/* IPv4 headers */
static struct iphdr tcp4_payload_ip[TCP_FRAMES_MEM];
/* TCP headers and data for IPv4 frames */
@@ -457,7 +458,6 @@ static_assert(MSS4 <= sizeof(tcp4_payload[0].data), "MSS4 is greater than 65516"
static struct tcp_buf_seq_update tcp4_seq_update[TCP_FRAMES_MEM];
static unsigned int tcp4_payload_used;
-static uint32_t tcp4_flags_vnet_len[TCP_FRAMES_MEM];
/* IPv4 headers for TCP option flags frames */
static struct iphdr tcp4_flags_ip[TCP_FRAMES_MEM];
/* TCP headers and option flags for IPv4 frames */
@@ -468,7 +468,6 @@ static unsigned int tcp4_flags_used;
/* Ethernet header for IPv6 frames */
static struct ethhdr tcp6_eth_src;
-static uint32_t tcp6_payload_vnet_len[TCP_FRAMES_MEM];
/* IPv6 headers */
static struct ipv6hdr tcp6_payload_ip[TCP_FRAMES_MEM];
/* TCP headers and data for IPv6 frames */
@@ -479,7 +478,6 @@ static_assert(MSS6 <= sizeof(tcp6_payload[0].data), "MSS6 is greater than 65516"
static struct tcp_buf_seq_update tcp6_seq_update[TCP_FRAMES_MEM];
static unsigned int tcp6_payload_used;
-static uint32_t tcp6_flags_vnet_len[TCP_FRAMES_MEM];
/* IPv6 headers for TCP option flags frames */
static struct ipv6hdr tcp6_flags_ip[TCP_FRAMES_MEM];
/* TCP headers and option flags for IPv6 frames */
@@ -944,9 +942,8 @@ static void tcp_sock4_iov_init(const struct ctx *c)
/* iovecs */
iov = tcp4_l2_iov[i];
- iov[TCP_IOV_TAP].iov_base = &tcp4_payload_vnet_len[i];
- iov[TCP_IOV_TAP].iov_len = c->mode == MODE_PASST ?
- sizeof(tcp4_payload_vnet_len[i]) : 0;
+ iov[TCP_IOV_TAP].iov_base = &vnet_len;
+ iov[TCP_IOV_TAP].iov_len = sizeof(vnet_len);
iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
iov[TCP_IOV_ETH].iov_len = sizeof(tcp4_eth_src);
iov[TCP_IOV_IP].iov_base = &tcp4_payload_ip[i];
@@ -954,9 +951,8 @@ static void tcp_sock4_iov_init(const struct ctx *c)
iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_payload[i];
iov = tcp4_l2_flags_iov[i];
- iov[TCP_IOV_TAP].iov_base = &tcp4_flags_vnet_len[i];
- iov[TCP_IOV_TAP].iov_len = c->mode == MODE_PASST ?
- sizeof(tcp4_flags_vnet_len[i]) : 0;
+ iov[TCP_IOV_TAP].iov_base = &vnet_len;
+ iov[TCP_IOV_TAP].iov_len = sizeof(vnet_len);
iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
iov[TCP_IOV_ETH].iov_len = sizeof(tcp4_eth_src);
iov[TCP_IOV_IP].iov_base = &tcp4_flags_ip[i];
@@ -989,9 +985,8 @@ static void tcp_sock6_iov_init(const struct ctx *c)
/* iovecs */
iov = tcp6_l2_iov[i];
- iov[TCP_IOV_TAP].iov_base = &tcp6_payload_vnet_len[i];
- iov[TCP_IOV_TAP].iov_len = c->mode == MODE_PASST ?
- sizeof(tcp6_payload_vnet_len[i]) : 0;
+ iov[TCP_IOV_TAP].iov_base = &vnet_len;
+ iov[TCP_IOV_TAP].iov_len = sizeof(vnet_len);
iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
iov[TCP_IOV_ETH].iov_len = sizeof(tcp6_eth_src);
iov[TCP_IOV_IP].iov_base = &tcp6_payload_ip[i];
@@ -999,9 +994,8 @@ static void tcp_sock6_iov_init(const struct ctx *c)
iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_payload[i];
iov = tcp6_l2_flags_iov[i];
- iov[TCP_IOV_TAP].iov_base = &tcp6_flags_vnet_len[i];
- iov[TCP_IOV_TAP].iov_len = c->mode == MODE_PASST ?
- sizeof(tcp6_flags_vnet_len[i]) : 0;
+ iov[TCP_IOV_TAP].iov_base = &vnet_len;
+ iov[TCP_IOV_TAP].iov_len = sizeof(vnet_len);
iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
iov[TCP_IOV_ETH].iov_len = sizeof(tcp6_eth_src);
iov[TCP_IOV_IP].iov_base = &tcp6_flags_ip[i];
@@ -1558,7 +1552,6 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
struct tcp_info tinfo = { 0 };
socklen_t sl = sizeof(tinfo);
int s = conn->sock;
- uint32_t vnet_len;
size_t optlen = 0;
struct tcphdr *th;
struct iovec *iov;
@@ -1587,10 +1580,8 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
if (CONN_V4(conn)) {
iov = tcp4_l2_flags_iov[tcp4_flags_used++];
- vnet_len = sizeof(struct ethhdr) + sizeof(struct iphdr);
} else {
iov = tcp6_l2_flags_iov[tcp6_flags_used++];
- vnet_len = sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
}
payload = iov[TCP_IOV_PAYLOAD].iov_base;
@@ -1649,8 +1640,6 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
conn->seq_to_tap);
iov[TCP_IOV_PAYLOAD].iov_len = ip_len;
- *(uint32_t *)iov[TCP_IOV_TAP].iov_base = htonl(vnet_len + ip_len);
-
if (th->ack) {
if (SEQ_GE(conn->seq_ack_to_tap, conn->seq_from_tap))
conn_flag(c, conn, ~ACK_TO_TAP_DUE);
@@ -2150,10 +2139,6 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
ip_len = tcp_l2_buf_fill_headers(c, conn, iov, plen, check,
seq);
iov[TCP_IOV_PAYLOAD].iov_len = ip_len;
- *(uint32_t *)iov[TCP_IOV_TAP].iov_base =
- htonl(sizeof(struct ethhdr) +
- sizeof(struct iphdr) +
- ip_len);
if (tcp4_payload_used > TCP_FRAMES_MEM - 1)
tcp_payload_flush(c);
} else if (CONN_V6(conn)) {
@@ -2163,10 +2148,6 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
iov = tcp6_l2_iov[tcp6_payload_used++];
ip_len = tcp_l2_buf_fill_headers(c, conn, iov, plen, NULL, seq);
iov[TCP_IOV_PAYLOAD].iov_len = ip_len;
- *(uint32_t *)iov[TCP_IOV_TAP].iov_base =
- htonl(sizeof(struct ethhdr) +
- sizeof(struct ipv6hdr) +
- ip_len);
if (tcp6_payload_used > TCP_FRAMES_MEM - 1)
tcp_payload_flush(c);
}
--
@@ -443,10 +443,11 @@ struct tcp_flags_t {
} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
#endif
+static uint32_t vnet_len;
+
/* Ethernet header for IPv4 frames */
static struct ethhdr tcp4_eth_src;
-static uint32_t tcp4_payload_vnet_len[TCP_FRAMES_MEM];
/* IPv4 headers */
static struct iphdr tcp4_payload_ip[TCP_FRAMES_MEM];
/* TCP headers and data for IPv4 frames */
@@ -457,7 +458,6 @@ static_assert(MSS4 <= sizeof(tcp4_payload[0].data), "MSS4 is greater than 65516"
static struct tcp_buf_seq_update tcp4_seq_update[TCP_FRAMES_MEM];
static unsigned int tcp4_payload_used;
-static uint32_t tcp4_flags_vnet_len[TCP_FRAMES_MEM];
/* IPv4 headers for TCP option flags frames */
static struct iphdr tcp4_flags_ip[TCP_FRAMES_MEM];
/* TCP headers and option flags for IPv4 frames */
@@ -468,7 +468,6 @@ static unsigned int tcp4_flags_used;
/* Ethernet header for IPv6 frames */
static struct ethhdr tcp6_eth_src;
-static uint32_t tcp6_payload_vnet_len[TCP_FRAMES_MEM];
/* IPv6 headers */
static struct ipv6hdr tcp6_payload_ip[TCP_FRAMES_MEM];
/* TCP headers and data for IPv6 frames */
@@ -479,7 +478,6 @@ static_assert(MSS6 <= sizeof(tcp6_payload[0].data), "MSS6 is greater than 65516"
static struct tcp_buf_seq_update tcp6_seq_update[TCP_FRAMES_MEM];
static unsigned int tcp6_payload_used;
-static uint32_t tcp6_flags_vnet_len[TCP_FRAMES_MEM];
/* IPv6 headers for TCP option flags frames */
static struct ipv6hdr tcp6_flags_ip[TCP_FRAMES_MEM];
/* TCP headers and option flags for IPv6 frames */
@@ -944,9 +942,8 @@ static void tcp_sock4_iov_init(const struct ctx *c)
/* iovecs */
iov = tcp4_l2_iov[i];
- iov[TCP_IOV_TAP].iov_base = &tcp4_payload_vnet_len[i];
- iov[TCP_IOV_TAP].iov_len = c->mode == MODE_PASST ?
- sizeof(tcp4_payload_vnet_len[i]) : 0;
+ iov[TCP_IOV_TAP].iov_base = &vnet_len;
+ iov[TCP_IOV_TAP].iov_len = sizeof(vnet_len);
iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
iov[TCP_IOV_ETH].iov_len = sizeof(tcp4_eth_src);
iov[TCP_IOV_IP].iov_base = &tcp4_payload_ip[i];
@@ -954,9 +951,8 @@ static void tcp_sock4_iov_init(const struct ctx *c)
iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_payload[i];
iov = tcp4_l2_flags_iov[i];
- iov[TCP_IOV_TAP].iov_base = &tcp4_flags_vnet_len[i];
- iov[TCP_IOV_TAP].iov_len = c->mode == MODE_PASST ?
- sizeof(tcp4_flags_vnet_len[i]) : 0;
+ iov[TCP_IOV_TAP].iov_base = &vnet_len;
+ iov[TCP_IOV_TAP].iov_len = sizeof(vnet_len);
iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
iov[TCP_IOV_ETH].iov_len = sizeof(tcp4_eth_src);
iov[TCP_IOV_IP].iov_base = &tcp4_flags_ip[i];
@@ -989,9 +985,8 @@ static void tcp_sock6_iov_init(const struct ctx *c)
/* iovecs */
iov = tcp6_l2_iov[i];
- iov[TCP_IOV_TAP].iov_base = &tcp6_payload_vnet_len[i];
- iov[TCP_IOV_TAP].iov_len = c->mode == MODE_PASST ?
- sizeof(tcp6_payload_vnet_len[i]) : 0;
+ iov[TCP_IOV_TAP].iov_base = &vnet_len;
+ iov[TCP_IOV_TAP].iov_len = sizeof(vnet_len);
iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
iov[TCP_IOV_ETH].iov_len = sizeof(tcp6_eth_src);
iov[TCP_IOV_IP].iov_base = &tcp6_payload_ip[i];
@@ -999,9 +994,8 @@ static void tcp_sock6_iov_init(const struct ctx *c)
iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_payload[i];
iov = tcp6_l2_flags_iov[i];
- iov[TCP_IOV_TAP].iov_base = &tcp6_flags_vnet_len[i];
- iov[TCP_IOV_TAP].iov_len = c->mode == MODE_PASST ?
- sizeof(tcp6_flags_vnet_len[i]) : 0;
+ iov[TCP_IOV_TAP].iov_base = &vnet_len;
+ iov[TCP_IOV_TAP].iov_len = sizeof(vnet_len);
iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
iov[TCP_IOV_ETH].iov_len = sizeof(tcp6_eth_src);
iov[TCP_IOV_IP].iov_base = &tcp6_flags_ip[i];
@@ -1558,7 +1552,6 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
struct tcp_info tinfo = { 0 };
socklen_t sl = sizeof(tinfo);
int s = conn->sock;
- uint32_t vnet_len;
size_t optlen = 0;
struct tcphdr *th;
struct iovec *iov;
@@ -1587,10 +1580,8 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
if (CONN_V4(conn)) {
iov = tcp4_l2_flags_iov[tcp4_flags_used++];
- vnet_len = sizeof(struct ethhdr) + sizeof(struct iphdr);
} else {
iov = tcp6_l2_flags_iov[tcp6_flags_used++];
- vnet_len = sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
}
payload = iov[TCP_IOV_PAYLOAD].iov_base;
@@ -1649,8 +1640,6 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
conn->seq_to_tap);
iov[TCP_IOV_PAYLOAD].iov_len = ip_len;
- *(uint32_t *)iov[TCP_IOV_TAP].iov_base = htonl(vnet_len + ip_len);
-
if (th->ack) {
if (SEQ_GE(conn->seq_ack_to_tap, conn->seq_from_tap))
conn_flag(c, conn, ~ACK_TO_TAP_DUE);
@@ -2150,10 +2139,6 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
ip_len = tcp_l2_buf_fill_headers(c, conn, iov, plen, check,
seq);
iov[TCP_IOV_PAYLOAD].iov_len = ip_len;
- *(uint32_t *)iov[TCP_IOV_TAP].iov_base =
- htonl(sizeof(struct ethhdr) +
- sizeof(struct iphdr) +
- ip_len);
if (tcp4_payload_used > TCP_FRAMES_MEM - 1)
tcp_payload_flush(c);
} else if (CONN_V6(conn)) {
@@ -2163,10 +2148,6 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
iov = tcp6_l2_iov[tcp6_payload_used++];
ip_len = tcp_l2_buf_fill_headers(c, conn, iov, plen, NULL, seq);
iov[TCP_IOV_PAYLOAD].iov_len = ip_len;
- *(uint32_t *)iov[TCP_IOV_TAP].iov_base =
- htonl(sizeof(struct ethhdr) +
- sizeof(struct ipv6hdr) +
- ip_len);
if (tcp6_payload_used > TCP_FRAMES_MEM - 1)
tcp_payload_flush(c);
}
--
2.44.0
next prev parent reply other threads:[~2024-03-26 10:19 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-03-21 10:26 [RFC v4] tcp: Replace TCP buffer structure by an iovec array Laurent Vivier
2024-03-22 2:40 ` David Gibson
2024-03-26 10:19 ` Laurent Vivier [this message]
2024-03-27 1:35 ` David Gibson
2024-04-12 17:59 ` Stefano Brivio
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=91c44be3-7643-407c-a58d-41476a721d59@redhat.com \
--to=lvivier@redhat.com \
--cc=david@gibson.dropbear.id.au \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).