On Tue, Nov 05, 2024 at 08:07:44PM -0500, Jon Maloy wrote: > In order to reduce static memory and code footprint, we merge > the array for l2 flag frames into the one for payload frames. > > This change also ensures that no flag message will be sent out > over the l2 media bypassing already queued payload messages. > > Performance measurements with iperf3, where we force all > traffic via the tap queue, show no significant difference: > > Dual traffic both directions sinmultaneously, with patch: > ======================================================== > host->ns: > -------- > [ ID] Interval Transfer Bitrate Retr > [ 5] 0.00-100.00 sec 36.3 GBytes 3.12 Gbits/sec 4759 sender > [ 5] 0.00-100.04 sec 36.3 GBytes 3.11 Gbits/sec receiver > > ns->host: > --------- > [ ID] Interval Transfer Bitrate > [ 5] 0.00-100.00 sec 321 GBytes 27.6 Gbits/sec receiver > > Dual traffic both directions sinmultaneously, without patch: > ============================================================ > host->ns: > -------- > [ ID] Interval Transfer Bitrate Retr > [ 5] 0.00-100.00 sec 35.0 GBytes 3.01 Gbits/sec 6001 sender > [ 5] 0.00-100.04 sec 34.8 GBytes 2.99 Gbits/sec receiver > > ns->host > -------- > [ ID] Interval Transfer Bitrate > [ 5] 0.00-100.00 sec 345 GBytes 29.6 Gbits/sec receiver > > Single connection, with patch: > ============================== > host->ns: > --------- > [ ID] Interval Transfer Bitrate Retr > [ 5] 0.00-100.00 sec 138 GBytes 11.8 Gbits/sec 922 sender > [ 5] 0.00-100.04 sec 138 GBytes 11.8 Gbits/sec receiver > > ns->host: > ----------- > [ ID] Interval Transfer Bitrate > [ 5] 0.00-100.00 sec 430 GBytes 36.9 Gbits/sec receiver > > Single connection, without patch: > ================================= > host->ns: > ------------ > [ ID] Interval Transfer Bitrate Retr > [ 5] 0.00-100.00 sec 139 GBytes 11.9 Gbits/sec 900 sender > [ 5] 0.00-100.04 sec 139 GBytes 11.9 Gbits/sec receiver > > ns->host: > --------- > [ ID] Interval Transfer Bitrate > [ 5] 0.00-100.00 sec 440 GBytes 37.8 Gbits/sec receiver > > Signed-off-by: Jon Maloy Reviewed-by: David Gibson > > ---- > v2: - Adapted to and rebased on latest release. > - Removed redundant tcp_flags_push() function. > - Added measurement results. > v3: - Rebased on latest release. > - Fixes based on comments from D. Gibson > --- > tcp.c | 1 - > tcp_buf.c | 70 ++++++++++++-------------------------------------- > tcp_buf.h | 1 - > tcp_internal.h | 15 ----------- > 4 files changed, 17 insertions(+), 70 deletions(-) > > diff --git a/tcp.c b/tcp.c > index 4e0a17e..b652900 100644 > --- a/tcp.c > +++ b/tcp.c > @@ -937,7 +937,6 @@ bool tcp_flow_defer(const struct tcp_tap_conn *conn) > /* cppcheck-suppress [constParameterPointer, unmatchedSuppression] */ > void tcp_defer_handler(struct ctx *c) > { > - tcp_flags_flush(c); > tcp_payload_flush(c); > } > > diff --git a/tcp_buf.c b/tcp_buf.c > index 274e313..d29c1a9 100644 > --- a/tcp_buf.c > +++ b/tcp_buf.c > @@ -20,7 +20,7 @@ > > #include > > -#include > +#include Oops, thought I'd changed that one already. > > #include "util.h" > #include "ip.h" > @@ -59,22 +59,10 @@ static_assert(MSS6 <= sizeof(tcp_payload[0].data), "MSS6 is greater than 65516") > static struct tcp_tap_conn *tcp_frame_conns[TCP_FRAMES_MEM]; > static unsigned int tcp_payload_used; > > -static struct tap_hdr tcp_flags_tap_hdr[TCP_FRAMES_MEM]; > -/* IPv4 headers for TCP segment without payload */ > -static struct iphdr tcp4_flags_ip[TCP_FRAMES_MEM]; > -/* TCP segments without payload for IPv4 frames */ > -static struct tcp_flags_t tcp_flags[TCP_FRAMES_MEM]; > - > -static unsigned int tcp_flags_used; > - > -/* IPv6 headers for TCP segment without payload */ > -static struct ipv6hdr tcp6_flags_ip[TCP_FRAMES_MEM]; > - > /* recvmsg()/sendmsg() data for tap */ > static struct iovec iov_sock [TCP_FRAMES_MEM + 1]; > > static struct iovec tcp_l2_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS]; > -static struct iovec tcp_l2_flags_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS]; > > /** > * tcp_update_l2_buf() - Update Ethernet header buffers with addresses > @@ -103,15 +91,6 @@ void tcp_sock_iov_init(const struct ctx *c) > for (i = 0; i < ARRAY_SIZE(tcp_payload); i++) { > tcp6_payload_ip[i] = ip6; > tcp4_payload_ip[i] = iph; > - tcp_payload[i].th.doff = sizeof(struct tcphdr) / 4; > - tcp_payload[i].th.ack = 1; > - } > - > - for (i = 0; i < ARRAY_SIZE(tcp_flags); i++) { > - tcp6_flags_ip[i] = ip6; > - tcp4_flags_ip[i] = iph; > - tcp_flags[i].th.doff = sizeof(struct tcphdr) / 4; > - tcp_flags[i].th.ack = 1; > } > > for (i = 0; i < TCP_FRAMES_MEM; i++) { > @@ -121,25 +100,6 @@ void tcp_sock_iov_init(const struct ctx *c) > iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr); > iov[TCP_IOV_PAYLOAD].iov_base = &tcp_payload[i]; > } > - > - for (i = 0; i < TCP_FRAMES_MEM; i++) { > - struct iovec *iov = tcp_l2_flags_iov[i]; > - > - iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp_flags_tap_hdr[i]); > - iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr); > - iov[TCP_IOV_PAYLOAD].iov_base = &tcp_flags[i]; > - } > -} > - > -/** > - * tcp_flags_flush() - Send out buffers for segments with no data (flags) > - * @c: Execution context > - */ > -void tcp_flags_flush(const struct ctx *c) > -{ > - tap_send_frames(c, &tcp_l2_flags_iov[0][0], TCP_NUM_IOVS, > - tcp_flags_used); > - tcp_flags_used = 0; > } > > /** > @@ -171,7 +131,7 @@ static void tcp_revert_seq(const struct ctx *c, struct tcp_tap_conn **conns, > } > > /** > - * tcp_payload_flush() - Send out buffers for segments with data > + * tcp_payload_flush() - Send out buffers for segments with data or flags > * @c: Execution context > */ > void tcp_payload_flush(const struct ctx *c) > @@ -197,37 +157,35 @@ void tcp_payload_flush(const struct ctx *c) > */ > int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) > { > - struct tcp_flags_t *payload; > + struct tcp_payload_t *payload; > struct iovec *iov; > size_t optlen; > size_t l4len; > uint32_t seq; > int ret; > > - iov = tcp_l2_flags_iov[tcp_flags_used]; > + iov = tcp_l2_iov[tcp_payload_used]; > if (CONN_V4(conn)) { > - iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_flags_ip[tcp_flags_used]); > + iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[tcp_payload_used]); > iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src; > } else { > - iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_flags_ip[tcp_flags_used]); > + iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]); > iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src; > } > > payload = iov[TCP_IOV_PAYLOAD].iov_base; > seq = conn->seq_to_tap; > ret = tcp_prepare_flags(c, conn, flags, &payload->th, > - &payload->opts, &optlen); > + (struct tcp_syn_opts *)&payload->data, &optlen); > if (ret <= 0) > return ret; > > - tcp_flags_used++; > + tcp_payload_used++; > l4len = tcp_l2_buf_fill_headers(conn, iov, optlen, NULL, seq, false); > iov[TCP_IOV_PAYLOAD].iov_len = l4len; > - > if (flags & DUP_ACK) { > - struct iovec *dup_iov; > + struct iovec *dup_iov = tcp_l2_iov[tcp_payload_used++]; > > - dup_iov = tcp_l2_flags_iov[tcp_flags_used++]; > memcpy(dup_iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_TAP].iov_base, > iov[TCP_IOV_TAP].iov_len); > dup_iov[TCP_IOV_ETH].iov_base = iov[TCP_IOV_ETH].iov_base; > @@ -237,8 +195,8 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) > dup_iov[TCP_IOV_PAYLOAD].iov_len = l4len; > } > > - if (tcp_flags_used > TCP_FRAMES_MEM - 2) > - tcp_flags_flush(c); > + if (tcp_payload_used > TCP_FRAMES_MEM - 2) > + tcp_payload_flush(c); > > return 0; > } > @@ -254,6 +212,7 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) > static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn, > ssize_t dlen, int no_csum, uint32_t seq) > { > + struct tcp_payload_t *payload; > const uint16_t *check = NULL; > struct iovec *iov; > size_t l4len; > @@ -274,6 +233,11 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn, > iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]); > iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src; > } > + payload = iov[TCP_IOV_PAYLOAD].iov_base; > + payload->th.th_off = sizeof(struct tcphdr) / 4; > + payload->th.th_x2 = 0; > + payload->th.th_flags = 0; > + payload->th.ack = 1; > l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq, false); > iov[TCP_IOV_PAYLOAD].iov_len = l4len; > if (++tcp_payload_used > TCP_FRAMES_MEM - 1) > diff --git a/tcp_buf.h b/tcp_buf.h > index 49c04d4..54f5e53 100644 > --- a/tcp_buf.h > +++ b/tcp_buf.h > @@ -7,7 +7,6 @@ > #define TCP_BUF_H > > void tcp_sock_iov_init(const struct ctx *c); > -void tcp_flags_flush(const struct ctx *c); > void tcp_payload_flush(const struct ctx *c); > int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn); > int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags); > diff --git a/tcp_internal.h b/tcp_internal.h > index a5a47df..c846f60 100644 > --- a/tcp_internal.h > +++ b/tcp_internal.h > @@ -134,21 +134,6 @@ struct tcp_syn_opts { > .ws = TCP_OPT_WS(ws_), \ > }) > > -/** > - * struct tcp_flags_t - TCP header and data to send zero-length > - * segments (flags) > - * @th: TCP header > - * @opts TCP options > - */ > -struct tcp_flags_t { > - struct tcphdr th; > - struct tcp_syn_opts opts; > -#ifdef __AVX2__ > -} __attribute__ ((packed, aligned(32))); > -#else > -} __attribute__ ((packed, aligned(__alignof__(unsigned int)))); > -#endif > - > extern char tcp_buf_discard [MAX_WINDOW]; > > void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn, -- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson