public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
* [PATCH v2] tcp: unify payload and flags l2 frames array
@ 2024-11-02  0:51 Jon Maloy
  2024-11-04  1:16 ` David Gibson
  0 siblings, 1 reply; 3+ messages in thread
From: Jon Maloy @ 2024-11-02  0:51 UTC (permalink / raw)
  To: passt-dev, sbrivio, lvivier, dgibson, jmaloy

In order to reduce static memory and code footprint, we merge
the array for l2 flag frames into the one for payload frames.

This change also ensures that no flag message will be sent out
over the l2 media bypassing already queued payload messages.

Performance measurements with iperf3, where we force all
traffic via the tap queue, show no significant difference:

Dual traffic both directions sinmultaneously, with patch:
========================================================
host->ns:
--------
[ ID] Interval           Transfer     Bitrate         Retr
[  5]   0.00-100.00 sec  36.3 GBytes  3.12 Gbits/sec  4759             sender
[  5]   0.00-100.04 sec  36.3 GBytes  3.11 Gbits/sec                  receiver

ns->host:
---------
[ ID] Interval           Transfer     Bitrate
[  5]   0.00-100.00 sec   321 GBytes  27.6 Gbits/sec                  receiver

Dual traffic both directions sinmultaneously, without patch:
============================================================
host->ns:
--------
[ ID] Interval           Transfer     Bitrate         Retr
[  5]   0.00-100.00 sec  35.0 GBytes  3.01 Gbits/sec  6001             sender
[  5]   0.00-100.04 sec  34.8 GBytes  2.99 Gbits/sec                  receiver

ns->host
--------
[ ID] Interval           Transfer     Bitrate
[  5]   0.00-100.00 sec   345 GBytes  29.6 Gbits/sec                  receiver

Single connection, with patch:
==============================
host->ns:
---------
[ ID] Interval           Transfer     Bitrate         Retr
[  5]   0.00-100.00 sec   138 GBytes  11.8 Gbits/sec  922             sender
[  5]   0.00-100.04 sec   138 GBytes  11.8 Gbits/sec                  receiver

ns->host:
-----------
[ ID] Interval           Transfer     Bitrate
[  5]   0.00-100.00 sec   430 GBytes  36.9 Gbits/sec                  receiver

Single connection, without patch:
=================================
host->ns:
------------
[ ID] Interval           Transfer     Bitrate         Retr
[  5]   0.00-100.00 sec   139 GBytes  11.9 Gbits/sec  900             sender
[  5]   0.00-100.04 sec   139 GBytes  11.9 Gbits/sec                  receiver

ns->host:
---------
[ ID] Interval           Transfer     Bitrate
[  5]   0.00-100.00 sec   440 GBytes  37.8 Gbits/sec                  receiver

Signed-off-by: Jon Maloy <jmaloy@redhat.com>

----
v2: - Adapted to and rebased on latest release.
    - Removed redundant tcp_flags_push() function.
    - Added measurement results.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 tcp.c          |  1 -
 tcp_buf.c      | 66 ++++++++++++--------------------------------------
 tcp_buf.h      |  1 -
 tcp_internal.h | 15 ------------
 4 files changed, 16 insertions(+), 67 deletions(-)

diff --git a/tcp.c b/tcp.c
index 10ad06a..b17d5fe 100644
--- a/tcp.c
+++ b/tcp.c
@@ -936,7 +936,6 @@ bool tcp_flow_defer(const struct tcp_tap_conn *conn)
 /* cppcheck-suppress [constParameterPointer, unmatchedSuppression] */
 void tcp_defer_handler(struct ctx *c)
 {
-	tcp_flags_flush(c);
 	tcp_payload_flush(c);
 }
 
diff --git a/tcp_buf.c b/tcp_buf.c
index 274e313..58df4cc 100644
--- a/tcp_buf.c
+++ b/tcp_buf.c
@@ -33,6 +33,7 @@
 #include "tcp_internal.h"
 #include "tcp_buf.h"
 
+#define PAYLOAD_FLAGS htons(0x5010) /* doff = 5, ack = 1 */
 #define TCP_FRAMES_MEM			128
 #define TCP_FRAMES							   \
 	(c->mode == MODE_PASTA ? 1 : TCP_FRAMES_MEM)
@@ -59,22 +60,10 @@ static_assert(MSS6 <= sizeof(tcp_payload[0].data), "MSS6 is greater than 65516")
 static struct tcp_tap_conn *tcp_frame_conns[TCP_FRAMES_MEM];
 static unsigned int tcp_payload_used;
 
-static struct tap_hdr		tcp_flags_tap_hdr[TCP_FRAMES_MEM];
-/* IPv4 headers for TCP segment without payload */
-static struct iphdr		tcp4_flags_ip[TCP_FRAMES_MEM];
-/* TCP segments without payload for IPv4 frames */
-static struct tcp_flags_t	tcp_flags[TCP_FRAMES_MEM];
-
-static unsigned int tcp_flags_used;
-
-/* IPv6 headers for TCP segment without payload */
-static struct ipv6hdr		tcp6_flags_ip[TCP_FRAMES_MEM];
-
 /* recvmsg()/sendmsg() data for tap */
 static struct iovec	iov_sock		[TCP_FRAMES_MEM + 1];
 
 static struct iovec	tcp_l2_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS];
-static struct iovec	tcp_l2_flags_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS];
 
 /**
  * tcp_update_l2_buf() - Update Ethernet header buffers with addresses
@@ -107,13 +96,6 @@ void tcp_sock_iov_init(const struct ctx *c)
 		tcp_payload[i].th.ack = 1;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(tcp_flags); i++) {
-		tcp6_flags_ip[i] = ip6;
-		tcp4_flags_ip[i] = iph;
-		tcp_flags[i].th.doff = sizeof(struct tcphdr) / 4;
-		tcp_flags[i].th.ack = 1;
-	}
-
 	for (i = 0; i < TCP_FRAMES_MEM; i++) {
 		struct iovec *iov = tcp_l2_iov[i];
 
@@ -121,25 +103,6 @@ void tcp_sock_iov_init(const struct ctx *c)
 		iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr);
 		iov[TCP_IOV_PAYLOAD].iov_base = &tcp_payload[i];
 	}
-
-	for (i = 0; i < TCP_FRAMES_MEM; i++) {
-		struct iovec *iov = tcp_l2_flags_iov[i];
-
-		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp_flags_tap_hdr[i]);
-		iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr);
-		iov[TCP_IOV_PAYLOAD].iov_base = &tcp_flags[i];
-	}
-}
-
-/**
- * tcp_flags_flush() - Send out buffers for segments with no data (flags)
- * @c:		Execution context
- */
-void tcp_flags_flush(const struct ctx *c)
-{
-	tap_send_frames(c, &tcp_l2_flags_iov[0][0], TCP_NUM_IOVS,
-			tcp_flags_used);
-	tcp_flags_used = 0;
 }
 
 /**
@@ -171,7 +134,7 @@ static void tcp_revert_seq(const struct ctx *c, struct tcp_tap_conn **conns,
 }
 
 /**
- * tcp_payload_flush() - Send out buffers for segments with data
+ * tcp_payload_flush() - Send out buffers for segments with data or flags
  * @c:		Execution context
  */
 void tcp_payload_flush(const struct ctx *c)
@@ -197,37 +160,35 @@ void tcp_payload_flush(const struct ctx *c)
  */
 int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
 {
-	struct tcp_flags_t *payload;
+	struct tcp_payload_t *payload;
 	struct iovec *iov;
 	size_t optlen;
 	size_t l4len;
 	uint32_t seq;
 	int ret;
 
-	iov = tcp_l2_flags_iov[tcp_flags_used];
+	iov = tcp_l2_iov[tcp_payload_used];
 	if (CONN_V4(conn)) {
-		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_flags_ip[tcp_flags_used]);
+		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[tcp_payload_used]);
 		iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
 	} else {
-		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_flags_ip[tcp_flags_used]);
+		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
 		iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
 	}
 
 	payload = iov[TCP_IOV_PAYLOAD].iov_base;
 	seq = conn->seq_to_tap;
 	ret = tcp_prepare_flags(c, conn, flags, &payload->th,
-				&payload->opts, &optlen);
+				(struct tcp_syn_opts *)&payload->data, &optlen);
 	if (ret <= 0)
 		return ret;
 
-	tcp_flags_used++;
+	tcp_payload_used++;
 	l4len = tcp_l2_buf_fill_headers(conn, iov, optlen, NULL, seq, false);
 	iov[TCP_IOV_PAYLOAD].iov_len = l4len;
-
 	if (flags & DUP_ACK) {
-		struct iovec *dup_iov;
+		struct iovec *dup_iov = tcp_l2_iov[tcp_payload_used++];
 
-		dup_iov = tcp_l2_flags_iov[tcp_flags_used++];
 		memcpy(dup_iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_TAP].iov_base,
 		       iov[TCP_IOV_TAP].iov_len);
 		dup_iov[TCP_IOV_ETH].iov_base = iov[TCP_IOV_ETH].iov_base;
@@ -237,8 +198,8 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
 		dup_iov[TCP_IOV_PAYLOAD].iov_len = l4len;
 	}
 
-	if (tcp_flags_used > TCP_FRAMES_MEM - 2)
-		tcp_flags_flush(c);
+	if (tcp_payload_used > TCP_FRAMES_MEM - 2)
+		tcp_payload_flush(c);
 
 	return 0;
 }
@@ -254,8 +215,10 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
 static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
 			    ssize_t dlen, int no_csum, uint32_t seq)
 {
+	struct tcp_payload_t *payload;
 	const uint16_t *check = NULL;
 	struct iovec *iov;
+	uint16_t *flags;
 	size_t l4len;
 
 	conn->seq_to_tap = seq + dlen;
@@ -274,6 +237,9 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
 		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
 		iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
 	}
+	payload = iov[TCP_IOV_PAYLOAD].iov_base;
+	flags = &payload->th.window - 1;
+	*(flags) = PAYLOAD_FLAGS;
 	l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq, false);
 	iov[TCP_IOV_PAYLOAD].iov_len = l4len;
 	if (++tcp_payload_used > TCP_FRAMES_MEM - 1)
diff --git a/tcp_buf.h b/tcp_buf.h
index 49c04d4..54f5e53 100644
--- a/tcp_buf.h
+++ b/tcp_buf.h
@@ -7,7 +7,6 @@
 #define TCP_BUF_H
 
 void tcp_sock_iov_init(const struct ctx *c);
-void tcp_flags_flush(const struct ctx *c);
 void tcp_payload_flush(const struct ctx *c);
 int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn);
 int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags);
diff --git a/tcp_internal.h b/tcp_internal.h
index a5a47df..c846f60 100644
--- a/tcp_internal.h
+++ b/tcp_internal.h
@@ -134,21 +134,6 @@ struct tcp_syn_opts {
 		.ws = TCP_OPT_WS(ws_),			\
 	})
 
-/**
- * struct tcp_flags_t - TCP header and data to send zero-length
- *                      segments (flags)
- * @th:		TCP header
- * @opts	TCP options
- */
-struct tcp_flags_t {
-	struct tcphdr th;
-	struct tcp_syn_opts opts;
-#ifdef __AVX2__
-} __attribute__ ((packed, aligned(32)));
-#else
-} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
-#endif
-
 extern char tcp_buf_discard [MAX_WINDOW];
 
 void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,
-- 
@@ -134,21 +134,6 @@ struct tcp_syn_opts {
 		.ws = TCP_OPT_WS(ws_),			\
 	})
 
-/**
- * struct tcp_flags_t - TCP header and data to send zero-length
- *                      segments (flags)
- * @th:		TCP header
- * @opts	TCP options
- */
-struct tcp_flags_t {
-	struct tcphdr th;
-	struct tcp_syn_opts opts;
-#ifdef __AVX2__
-} __attribute__ ((packed, aligned(32)));
-#else
-} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
-#endif
-
 extern char tcp_buf_discard [MAX_WINDOW];
 
 void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,
-- 
2.45.2


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] tcp: unify payload and flags l2 frames array
  2024-11-02  0:51 [PATCH v2] tcp: unify payload and flags l2 frames array Jon Maloy
@ 2024-11-04  1:16 ` David Gibson
  2024-11-04  2:29   ` David Gibson
  0 siblings, 1 reply; 3+ messages in thread
From: David Gibson @ 2024-11-04  1:16 UTC (permalink / raw)
  To: Jon Maloy; +Cc: passt-dev, sbrivio, lvivier, dgibson

[-- Attachment #1: Type: text/plain, Size: 11471 bytes --]

On Fri, Nov 01, 2024 at 08:51:32PM -0400, Jon Maloy wrote:
> In order to reduce static memory and code footprint, we merge
> the array for l2 flag frames into the one for payload frames.
> 
> This change also ensures that no flag message will be sent out
> over the l2 media bypassing already queued payload messages.
> 
> Performance measurements with iperf3, where we force all
> traffic via the tap queue, show no significant difference:
> 
> Dual traffic both directions sinmultaneously, with patch:
> ========================================================
> host->ns:
> --------
> [ ID] Interval           Transfer     Bitrate         Retr
> [  5]   0.00-100.00 sec  36.3 GBytes  3.12 Gbits/sec  4759             sender
> [  5]   0.00-100.04 sec  36.3 GBytes  3.11 Gbits/sec                  receiver
> 
> ns->host:
> ---------
> [ ID] Interval           Transfer     Bitrate
> [  5]   0.00-100.00 sec   321 GBytes  27.6 Gbits/sec                  receiver

Not really anything to do with this patch, but it's mildly concerning
that the throughput is different by an order of magnitude between the
two directions.  We should probably look into that, in our copious
free time :/.


> Dual traffic both directions sinmultaneously, without patch:
> ============================================================
> host->ns:
> --------
> [ ID] Interval           Transfer     Bitrate         Retr
> [  5]   0.00-100.00 sec  35.0 GBytes  3.01 Gbits/sec  6001             sender
> [  5]   0.00-100.04 sec  34.8 GBytes  2.99 Gbits/sec                  receiver
> 
> ns->host
> --------
> [ ID] Interval           Transfer     Bitrate
> [  5]   0.00-100.00 sec   345 GBytes  29.6 Gbits/sec                  receiver
> 
> Single connection, with patch:
> ==============================
> host->ns:
> ---------
> [ ID] Interval           Transfer     Bitrate         Retr
> [  5]   0.00-100.00 sec   138 GBytes  11.8 Gbits/sec  922             sender
> [  5]   0.00-100.04 sec   138 GBytes  11.8 Gbits/sec                  receiver
> 
> ns->host:
> -----------
> [ ID] Interval           Transfer     Bitrate
> [  5]   0.00-100.00 sec   430 GBytes  36.9 Gbits/sec                  receiver
> 
> Single connection, without patch:
> =================================
> host->ns:
> ------------
> [ ID] Interval           Transfer     Bitrate         Retr
> [  5]   0.00-100.00 sec   139 GBytes  11.9 Gbits/sec  900             sender
> [  5]   0.00-100.04 sec   139 GBytes  11.9 Gbits/sec                  receiver
> 
> ns->host:
> ---------
> [ ID] Interval           Transfer     Bitrate
> [  5]   0.00-100.00 sec   440 GBytes  37.8 Gbits/sec                  receiver
> 
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>
> 
> ----
> v2: - Adapted to and rebased on latest release.
>     - Removed redundant tcp_flags_push() function.
>     - Added measurement results.
> 
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>

Generally LGTM.  One problem, which is pretty simple, but I think
really does need to be fixed.  See below.

> ---
>  tcp.c          |  1 -
>  tcp_buf.c      | 66 ++++++++++++--------------------------------------
>  tcp_buf.h      |  1 -
>  tcp_internal.h | 15 ------------
>  4 files changed, 16 insertions(+), 67 deletions(-)
> 
> diff --git a/tcp.c b/tcp.c
> index 10ad06a..b17d5fe 100644
> --- a/tcp.c
> +++ b/tcp.c
> @@ -936,7 +936,6 @@ bool tcp_flow_defer(const struct tcp_tap_conn *conn)
>  /* cppcheck-suppress [constParameterPointer, unmatchedSuppression] */
>  void tcp_defer_handler(struct ctx *c)
>  {
> -	tcp_flags_flush(c);
>  	tcp_payload_flush(c);
>  }
>  
> diff --git a/tcp_buf.c b/tcp_buf.c
> index 274e313..58df4cc 100644
> --- a/tcp_buf.c
> +++ b/tcp_buf.c
> @@ -33,6 +33,7 @@
>  #include "tcp_internal.h"
>  #include "tcp_buf.h"
>  
> +#define PAYLOAD_FLAGS htons(0x5010) /* doff = 5, ack = 1 */
>  #define TCP_FRAMES_MEM			128
>  #define TCP_FRAMES							   \
>  	(c->mode == MODE_PASTA ? 1 : TCP_FRAMES_MEM)
> @@ -59,22 +60,10 @@ static_assert(MSS6 <= sizeof(tcp_payload[0].data), "MSS6 is greater than 65516")
>  static struct tcp_tap_conn *tcp_frame_conns[TCP_FRAMES_MEM];
>  static unsigned int tcp_payload_used;
>  
> -static struct tap_hdr		tcp_flags_tap_hdr[TCP_FRAMES_MEM];
> -/* IPv4 headers for TCP segment without payload */
> -static struct iphdr		tcp4_flags_ip[TCP_FRAMES_MEM];
> -/* TCP segments without payload for IPv4 frames */
> -static struct tcp_flags_t	tcp_flags[TCP_FRAMES_MEM];
> -
> -static unsigned int tcp_flags_used;
> -
> -/* IPv6 headers for TCP segment without payload */
> -static struct ipv6hdr		tcp6_flags_ip[TCP_FRAMES_MEM];
> -
>  /* recvmsg()/sendmsg() data for tap */
>  static struct iovec	iov_sock		[TCP_FRAMES_MEM + 1];
>  
>  static struct iovec	tcp_l2_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS];
> -static struct iovec	tcp_l2_flags_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS];
>  
>  /**
>   * tcp_update_l2_buf() - Update Ethernet header buffers with addresses
> @@ -107,13 +96,6 @@ void tcp_sock_iov_init(const struct ctx *c)
>  		tcp_payload[i].th.ack = 1;
>  	}
>  
> -	for (i = 0; i < ARRAY_SIZE(tcp_flags); i++) {
> -		tcp6_flags_ip[i] = ip6;
> -		tcp4_flags_ip[i] = iph;
> -		tcp_flags[i].th.doff = sizeof(struct tcphdr) / 4;
> -		tcp_flags[i].th.ack = 1;
> -	}
> -
>  	for (i = 0; i < TCP_FRAMES_MEM; i++) {
>  		struct iovec *iov = tcp_l2_iov[i];
>  
> @@ -121,25 +103,6 @@ void tcp_sock_iov_init(const struct ctx *c)
>  		iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr);
>  		iov[TCP_IOV_PAYLOAD].iov_base = &tcp_payload[i];
>  	}
> -
> -	for (i = 0; i < TCP_FRAMES_MEM; i++) {
> -		struct iovec *iov = tcp_l2_flags_iov[i];
> -
> -		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp_flags_tap_hdr[i]);
> -		iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr);
> -		iov[TCP_IOV_PAYLOAD].iov_base = &tcp_flags[i];
> -	}
> -}
> -
> -/**
> - * tcp_flags_flush() - Send out buffers for segments with no data (flags)
> - * @c:		Execution context
> - */
> -void tcp_flags_flush(const struct ctx *c)
> -{
> -	tap_send_frames(c, &tcp_l2_flags_iov[0][0], TCP_NUM_IOVS,
> -			tcp_flags_used);
> -	tcp_flags_used = 0;
>  }
>  
>  /**
> @@ -171,7 +134,7 @@ static void tcp_revert_seq(const struct ctx *c, struct tcp_tap_conn **conns,
>  }
>  
>  /**
> - * tcp_payload_flush() - Send out buffers for segments with data
> + * tcp_payload_flush() - Send out buffers for segments with data or flags
>   * @c:		Execution context
>   */
>  void tcp_payload_flush(const struct ctx *c)
> @@ -197,37 +160,35 @@ void tcp_payload_flush(const struct ctx *c)
>   */
>  int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
>  {
> -	struct tcp_flags_t *payload;
> +	struct tcp_payload_t *payload;
>  	struct iovec *iov;
>  	size_t optlen;
>  	size_t l4len;
>  	uint32_t seq;
>  	int ret;
>  
> -	iov = tcp_l2_flags_iov[tcp_flags_used];
> +	iov = tcp_l2_iov[tcp_payload_used];
>  	if (CONN_V4(conn)) {
> -		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_flags_ip[tcp_flags_used]);
> +		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[tcp_payload_used]);
>  		iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
>  	} else {
> -		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_flags_ip[tcp_flags_used]);
> +		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
>  		iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
>  	}
>  
>  	payload = iov[TCP_IOV_PAYLOAD].iov_base;
>  	seq = conn->seq_to_tap;
>  	ret = tcp_prepare_flags(c, conn, flags, &payload->th,
> -				&payload->opts, &optlen);
> +				(struct tcp_syn_opts *)&payload->data, &optlen);
>  	if (ret <= 0)
>  		return ret;
>  
> -	tcp_flags_used++;
> +	tcp_payload_used++;
>  	l4len = tcp_l2_buf_fill_headers(conn, iov, optlen, NULL, seq, false);
>  	iov[TCP_IOV_PAYLOAD].iov_len = l4len;
> -
>  	if (flags & DUP_ACK) {
> -		struct iovec *dup_iov;
> +		struct iovec *dup_iov = tcp_l2_iov[tcp_payload_used++];
>  
> -		dup_iov = tcp_l2_flags_iov[tcp_flags_used++];
>  		memcpy(dup_iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_TAP].iov_base,
>  		       iov[TCP_IOV_TAP].iov_len);
>  		dup_iov[TCP_IOV_ETH].iov_base = iov[TCP_IOV_ETH].iov_base;
> @@ -237,8 +198,8 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
>  		dup_iov[TCP_IOV_PAYLOAD].iov_len = l4len;
>  	}
>  
> -	if (tcp_flags_used > TCP_FRAMES_MEM - 2)
> -		tcp_flags_flush(c);
> +	if (tcp_payload_used > TCP_FRAMES_MEM - 2)
> +		tcp_payload_flush(c);
>  
>  	return 0;
>  }
> @@ -254,8 +215,10 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
>  static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
>  			    ssize_t dlen, int no_csum, uint32_t seq)
>  {
> +	struct tcp_payload_t *payload;
>  	const uint16_t *check = NULL;
>  	struct iovec *iov;
> +	uint16_t *flags;
>  	size_t l4len;
>  
>  	conn->seq_to_tap = seq + dlen;
> @@ -274,6 +237,9 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
>  		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
>  		iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
>  	}
> +	payload = iov[TCP_IOV_PAYLOAD].iov_base;
> +	flags = &payload->th.window - 1;
> +	*(flags) = PAYLOAD_FLAGS;

I think this is likely to run afoul of TBAA rules, which could cause
miscompiles because cc assumes *flags is not aliased with payload->th.
Although it's more bulky, I think it's better to explicitly assign
each of the fields in struct tcphdr.  The compiler should be able to
boil it down to the same instructions in any case.

Note that now we're using the netinet version of struct tcphdr, it has
an anonymous union so you can use th_flags, rather than the one bit
fields (doesn't include doffset, though).

>  	l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq, false);
>  	iov[TCP_IOV_PAYLOAD].iov_len = l4len;
>  	if (++tcp_payload_used > TCP_FRAMES_MEM - 1)
> diff --git a/tcp_buf.h b/tcp_buf.h
> index 49c04d4..54f5e53 100644
> --- a/tcp_buf.h
> +++ b/tcp_buf.h
> @@ -7,7 +7,6 @@
>  #define TCP_BUF_H
>  
>  void tcp_sock_iov_init(const struct ctx *c);
> -void tcp_flags_flush(const struct ctx *c);
>  void tcp_payload_flush(const struct ctx *c);
>  int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn);
>  int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags);
> diff --git a/tcp_internal.h b/tcp_internal.h
> index a5a47df..c846f60 100644
> --- a/tcp_internal.h
> +++ b/tcp_internal.h
> @@ -134,21 +134,6 @@ struct tcp_syn_opts {
>  		.ws = TCP_OPT_WS(ws_),			\
>  	})
>  
> -/**
> - * struct tcp_flags_t - TCP header and data to send zero-length
> - *                      segments (flags)
> - * @th:		TCP header
> - * @opts	TCP options
> - */
> -struct tcp_flags_t {
> -	struct tcphdr th;
> -	struct tcp_syn_opts opts;
> -#ifdef __AVX2__
> -} __attribute__ ((packed, aligned(32)));
> -#else
> -} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
> -#endif
> -
>  extern char tcp_buf_discard [MAX_WINDOW];
>  
>  void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] tcp: unify payload and flags l2 frames array
  2024-11-04  1:16 ` David Gibson
@ 2024-11-04  2:29   ` David Gibson
  0 siblings, 0 replies; 3+ messages in thread
From: David Gibson @ 2024-11-04  2:29 UTC (permalink / raw)
  To: Jon Maloy; +Cc: passt-dev, sbrivio, lvivier, dgibson

[-- Attachment #1: Type: text/plain, Size: 2764 bytes --]

On Mon, Nov 04, 2024 at 12:16:58PM +1100, David Gibson wrote:
> On Fri, Nov 01, 2024 at 08:51:32PM -0400, Jon Maloy wrote:
> > In order to reduce static memory and code footprint, we merge
> > the array for l2 flag frames into the one for payload frames.
> > 
> > This change also ensures that no flag message will be sent out
> > over the l2 media bypassing already queued payload messages.
> > 
> > Performance measurements with iperf3, where we force all
> > traffic via the tap queue, show no significant difference:

Spotted a couple of extra things I missed in my previous mail.

[snip]
> > @@ -107,13 +96,6 @@ void tcp_sock_iov_init(const struct ctx *c)
> >  		tcp_payload[i].th.ack = 1;
> >  	}
> >  
> > -	for (i = 0; i < ARRAY_SIZE(tcp_flags); i++) {
> > -		tcp6_flags_ip[i] = ip6;
> > -		tcp4_flags_ip[i] = iph;
> > -		tcp_flags[i].th.doff = sizeof(struct tcphdr) / 4;
> > -		tcp_flags[i].th.ack = 1;
> > -	}
> > -
> >  	for (i = 0; i < TCP_FRAMES_MEM; i++) {
> >  		struct iovec *iov = tcp_l2_iov[i];
> >

Because you're now setting all the flags fields for every frame, you
should remove the initialisation of th.ack from iov_init.

[snip]
> > @@ -274,6 +237,9 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
> >  		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
> >  		iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
> >  	}
> > +	payload = iov[TCP_IOV_PAYLOAD].iov_base;
> > +	flags = &payload->th.window - 1;
> > +	*(flags) = PAYLOAD_FLAGS;
> 
> I think this is likely to run afoul of TBAA rules, which could cause
> miscompiles because cc assumes *flags is not aliased with payload->th.
> Although it's more bulky, I think it's better to explicitly assign
> each of the fields in struct tcphdr.  The compiler should be able to
> boil it down to the same instructions in any case.
> 
> Note that now we're using the netinet version of struct tcphdr, it has
> an anonymous union so you can use th_flags, rather than the one bit
> fields (doesn't include doffset, though).

Actually, looking at this again, you shouldn't need to update doff in
any case - it's the same for flag frames and data frames.  Of course,
it's plausible there's no point pre-filling the doff (or any other)
header fields, but I don't think changing that should be in scope for
this patch.

I hope to post this afternoon a bunch of different buffer cleanups,
along with vhost rebased on those and other changes.  Unfortunately
those will conflict with this patch, sorry.

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-11-04  2:30 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-11-02  0:51 [PATCH v2] tcp: unify payload and flags l2 frames array Jon Maloy
2024-11-04  1:16 ` David Gibson
2024-11-04  2:29   ` David Gibson

Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).