public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: Jon Maloy <jmaloy@redhat.com>
Cc: passt-dev@passt.top, sbrivio@redhat.com, lvivier@redhat.com,
	dgibson@redhat.com
Subject: Re: [PATCH v3] tcp: unify payload and flags l2 frames array
Date: Wed, 6 Nov 2024 12:47:47 +1100	[thread overview]
Message-ID: <ZyrKw18qALw2Q0jN@zatzit> (raw)
In-Reply-To: <20241106010744.2082849-1-jmaloy@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 10713 bytes --]

On Tue, Nov 05, 2024 at 08:07:44PM -0500, Jon Maloy wrote:
> In order to reduce static memory and code footprint, we merge
> the array for l2 flag frames into the one for payload frames.
> 
> This change also ensures that no flag message will be sent out
> over the l2 media bypassing already queued payload messages.
> 
> Performance measurements with iperf3, where we force all
> traffic via the tap queue, show no significant difference:
> 
> Dual traffic both directions sinmultaneously, with patch:
> ========================================================
> host->ns:
> --------
> [ ID] Interval           Transfer     Bitrate         Retr
> [  5]   0.00-100.00 sec  36.3 GBytes  3.12 Gbits/sec  4759       sender
> [  5]   0.00-100.04 sec  36.3 GBytes  3.11 Gbits/sec             receiver
> 
> ns->host:
> ---------
> [ ID] Interval           Transfer     Bitrate
> [  5]   0.00-100.00 sec   321 GBytes  27.6 Gbits/sec            receiver
> 
> Dual traffic both directions sinmultaneously, without patch:
> ============================================================
> host->ns:
> --------
> [ ID] Interval           Transfer     Bitrate         Retr
> [  5]   0.00-100.00 sec  35.0 GBytes  3.01 Gbits/sec  6001       sender
> [  5]   0.00-100.04 sec  34.8 GBytes  2.99 Gbits/sec            receiver
> 
> ns->host
> --------
> [ ID] Interval           Transfer     Bitrate
> [  5]   0.00-100.00 sec   345 GBytes  29.6 Gbits/sec            receiver
> 
> Single connection, with patch:
> ==============================
> host->ns:
> ---------
> [ ID] Interval           Transfer     Bitrate         Retr
> [  5]   0.00-100.00 sec   138 GBytes  11.8 Gbits/sec  922       sender
> [  5]   0.00-100.04 sec   138 GBytes  11.8 Gbits/sec            receiver
> 
> ns->host:
> -----------
> [ ID] Interval           Transfer     Bitrate
> [  5]   0.00-100.00 sec   430 GBytes  36.9 Gbits/sec            receiver
> 
> Single connection, without patch:
> =================================
> host->ns:
> ------------
> [ ID] Interval           Transfer     Bitrate         Retr
> [  5]   0.00-100.00 sec   139 GBytes  11.9 Gbits/sec  900       sender
> [  5]   0.00-100.04 sec   139 GBytes  11.9 Gbits/sec            receiver
> 
> ns->host:
> ---------
> [ ID] Interval           Transfer     Bitrate
> [  5]   0.00-100.00 sec   440 GBytes  37.8 Gbits/sec            receiver
> 
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> 
> ----
> v2: - Adapted to and rebased on latest release.
>     - Removed redundant tcp_flags_push() function.
>     - Added measurement results.
> v3: - Rebased on latest release.
>     - Fixes based on comments from D. Gibson
> ---
>  tcp.c          |  1 -
>  tcp_buf.c      | 70 ++++++++++++--------------------------------------
>  tcp_buf.h      |  1 -
>  tcp_internal.h | 15 -----------
>  4 files changed, 17 insertions(+), 70 deletions(-)
> 
> diff --git a/tcp.c b/tcp.c
> index 4e0a17e..b652900 100644
> --- a/tcp.c
> +++ b/tcp.c
> @@ -937,7 +937,6 @@ bool tcp_flow_defer(const struct tcp_tap_conn *conn)
>  /* cppcheck-suppress [constParameterPointer, unmatchedSuppression] */
>  void tcp_defer_handler(struct ctx *c)
>  {
> -	tcp_flags_flush(c);
>  	tcp_payload_flush(c);
>  }
>  
> diff --git a/tcp_buf.c b/tcp_buf.c
> index 274e313..d29c1a9 100644
> --- a/tcp_buf.c
> +++ b/tcp_buf.c
> @@ -20,7 +20,7 @@
>  
>  #include <netinet/ip.h>
>  
> -#include <linux/tcp.h>
> +#include <netinet/tcp.h>

Oops, thought I'd changed that one already.

>  
>  #include "util.h"
>  #include "ip.h"
> @@ -59,22 +59,10 @@ static_assert(MSS6 <= sizeof(tcp_payload[0].data), "MSS6 is greater than 65516")
>  static struct tcp_tap_conn *tcp_frame_conns[TCP_FRAMES_MEM];
>  static unsigned int tcp_payload_used;
>  
> -static struct tap_hdr		tcp_flags_tap_hdr[TCP_FRAMES_MEM];
> -/* IPv4 headers for TCP segment without payload */
> -static struct iphdr		tcp4_flags_ip[TCP_FRAMES_MEM];
> -/* TCP segments without payload for IPv4 frames */
> -static struct tcp_flags_t	tcp_flags[TCP_FRAMES_MEM];
> -
> -static unsigned int tcp_flags_used;
> -
> -/* IPv6 headers for TCP segment without payload */
> -static struct ipv6hdr		tcp6_flags_ip[TCP_FRAMES_MEM];
> -
>  /* recvmsg()/sendmsg() data for tap */
>  static struct iovec	iov_sock		[TCP_FRAMES_MEM + 1];
>  
>  static struct iovec	tcp_l2_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS];
> -static struct iovec	tcp_l2_flags_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS];
>  
>  /**
>   * tcp_update_l2_buf() - Update Ethernet header buffers with addresses
> @@ -103,15 +91,6 @@ void tcp_sock_iov_init(const struct ctx *c)
>  	for (i = 0; i < ARRAY_SIZE(tcp_payload); i++) {
>  		tcp6_payload_ip[i] = ip6;
>  		tcp4_payload_ip[i] = iph;
> -		tcp_payload[i].th.doff = sizeof(struct tcphdr) / 4;
> -		tcp_payload[i].th.ack = 1;
> -	}
> -
> -	for (i = 0; i < ARRAY_SIZE(tcp_flags); i++) {
> -		tcp6_flags_ip[i] = ip6;
> -		tcp4_flags_ip[i] = iph;
> -		tcp_flags[i].th.doff = sizeof(struct tcphdr) / 4;
> -		tcp_flags[i].th.ack = 1;
>  	}
>  
>  	for (i = 0; i < TCP_FRAMES_MEM; i++) {
> @@ -121,25 +100,6 @@ void tcp_sock_iov_init(const struct ctx *c)
>  		iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr);
>  		iov[TCP_IOV_PAYLOAD].iov_base = &tcp_payload[i];
>  	}
> -
> -	for (i = 0; i < TCP_FRAMES_MEM; i++) {
> -		struct iovec *iov = tcp_l2_flags_iov[i];
> -
> -		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp_flags_tap_hdr[i]);
> -		iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr);
> -		iov[TCP_IOV_PAYLOAD].iov_base = &tcp_flags[i];
> -	}
> -}
> -
> -/**
> - * tcp_flags_flush() - Send out buffers for segments with no data (flags)
> - * @c:		Execution context
> - */
> -void tcp_flags_flush(const struct ctx *c)
> -{
> -	tap_send_frames(c, &tcp_l2_flags_iov[0][0], TCP_NUM_IOVS,
> -			tcp_flags_used);
> -	tcp_flags_used = 0;
>  }
>  
>  /**
> @@ -171,7 +131,7 @@ static void tcp_revert_seq(const struct ctx *c, struct tcp_tap_conn **conns,
>  }
>  
>  /**
> - * tcp_payload_flush() - Send out buffers for segments with data
> + * tcp_payload_flush() - Send out buffers for segments with data or flags
>   * @c:		Execution context
>   */
>  void tcp_payload_flush(const struct ctx *c)
> @@ -197,37 +157,35 @@ void tcp_payload_flush(const struct ctx *c)
>   */
>  int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
>  {
> -	struct tcp_flags_t *payload;
> +	struct tcp_payload_t *payload;
>  	struct iovec *iov;
>  	size_t optlen;
>  	size_t l4len;
>  	uint32_t seq;
>  	int ret;
>  
> -	iov = tcp_l2_flags_iov[tcp_flags_used];
> +	iov = tcp_l2_iov[tcp_payload_used];
>  	if (CONN_V4(conn)) {
> -		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_flags_ip[tcp_flags_used]);
> +		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[tcp_payload_used]);
>  		iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
>  	} else {
> -		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_flags_ip[tcp_flags_used]);
> +		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
>  		iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
>  	}
>  
>  	payload = iov[TCP_IOV_PAYLOAD].iov_base;
>  	seq = conn->seq_to_tap;
>  	ret = tcp_prepare_flags(c, conn, flags, &payload->th,
> -				&payload->opts, &optlen);
> +				(struct tcp_syn_opts *)&payload->data, &optlen);
>  	if (ret <= 0)
>  		return ret;
>  
> -	tcp_flags_used++;
> +	tcp_payload_used++;
>  	l4len = tcp_l2_buf_fill_headers(conn, iov, optlen, NULL, seq, false);
>  	iov[TCP_IOV_PAYLOAD].iov_len = l4len;
> -
>  	if (flags & DUP_ACK) {
> -		struct iovec *dup_iov;
> +		struct iovec *dup_iov = tcp_l2_iov[tcp_payload_used++];
>  
> -		dup_iov = tcp_l2_flags_iov[tcp_flags_used++];
>  		memcpy(dup_iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_TAP].iov_base,
>  		       iov[TCP_IOV_TAP].iov_len);
>  		dup_iov[TCP_IOV_ETH].iov_base = iov[TCP_IOV_ETH].iov_base;
> @@ -237,8 +195,8 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
>  		dup_iov[TCP_IOV_PAYLOAD].iov_len = l4len;
>  	}
>  
> -	if (tcp_flags_used > TCP_FRAMES_MEM - 2)
> -		tcp_flags_flush(c);
> +	if (tcp_payload_used > TCP_FRAMES_MEM - 2)
> +		tcp_payload_flush(c);
>  
>  	return 0;
>  }
> @@ -254,6 +212,7 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
>  static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
>  			    ssize_t dlen, int no_csum, uint32_t seq)
>  {
> +	struct tcp_payload_t *payload;
>  	const uint16_t *check = NULL;
>  	struct iovec *iov;
>  	size_t l4len;
> @@ -274,6 +233,11 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
>  		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
>  		iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
>  	}
> +	payload = iov[TCP_IOV_PAYLOAD].iov_base;
> +	payload->th.th_off = sizeof(struct tcphdr) / 4;
> +	payload->th.th_x2 = 0;
> +	payload->th.th_flags = 0;
> +	payload->th.ack = 1;
>  	l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq, false);
>  	iov[TCP_IOV_PAYLOAD].iov_len = l4len;
>  	if (++tcp_payload_used > TCP_FRAMES_MEM - 1)
> diff --git a/tcp_buf.h b/tcp_buf.h
> index 49c04d4..54f5e53 100644
> --- a/tcp_buf.h
> +++ b/tcp_buf.h
> @@ -7,7 +7,6 @@
>  #define TCP_BUF_H
>  
>  void tcp_sock_iov_init(const struct ctx *c);
> -void tcp_flags_flush(const struct ctx *c);
>  void tcp_payload_flush(const struct ctx *c);
>  int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn);
>  int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags);
> diff --git a/tcp_internal.h b/tcp_internal.h
> index a5a47df..c846f60 100644
> --- a/tcp_internal.h
> +++ b/tcp_internal.h
> @@ -134,21 +134,6 @@ struct tcp_syn_opts {
>  		.ws = TCP_OPT_WS(ws_),			\
>  	})
>  
> -/**
> - * struct tcp_flags_t - TCP header and data to send zero-length
> - *                      segments (flags)
> - * @th:		TCP header
> - * @opts	TCP options
> - */
> -struct tcp_flags_t {
> -	struct tcphdr th;
> -	struct tcp_syn_opts opts;
> -#ifdef __AVX2__
> -} __attribute__ ((packed, aligned(32)));
> -#else
> -} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
> -#endif
> -
>  extern char tcp_buf_discard [MAX_WINDOW];
>  
>  void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

  reply	other threads:[~2024-11-06  1:48 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-11-06  1:07 [PATCH v3] tcp: unify payload and flags l2 frames array Jon Maloy
2024-11-06  1:47 ` David Gibson [this message]
2024-11-07 14:54 ` Stefano Brivio

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZyrKw18qALw2Q0jN@zatzit \
    --to=david@gibson.dropbear.id.au \
    --cc=dgibson@redhat.com \
    --cc=jmaloy@redhat.com \
    --cc=lvivier@redhat.com \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).