public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>
Cc: passt-dev@passt.top
Subject: Re: [PATCH v2 12/22] tcp, tcp_splice: Helpers for getting sockets from the pools
Date: Mon, 19 Feb 2024 12:51:50 +1100	[thread overview]
Message-ID: <ZdK0Nplwhj_Ud7D9@zatzit> (raw)
In-Reply-To: <20240218220029.57acacf8@elisabeth>

[-- Attachment #1: Type: text/plain, Size: 6636 bytes --]

On Sun, Feb 18, 2024 at 10:00:29PM +0100, Stefano Brivio wrote:
> On Tue,  6 Feb 2024 12:17:24 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > We maintain pools of ready-to-connect sockets in both the original and
> > (for pasta) guest namespace to reduce latency when starting new TCP
> > connections.  If we exhaust those pools we have to take a higher
> > latency path to get a new socket.
> > 
> > Currently we open-code that fallback in the places we need it.  To
> > improve clarity encapsulate that into helper functions.
> > 
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> >  tcp.c        | 29 ++++++++++++++++++++++++-----
> >  tcp_conn.h   |  2 +-
> >  tcp_splice.c | 46 +++++++++++++++++++++++++---------------------
> >  3 files changed, 50 insertions(+), 27 deletions(-)
> > 
> > diff --git a/tcp.c b/tcp.c
> > index e15b932f..c06d1cc4 100644
> > --- a/tcp.c
> > +++ b/tcp.c
> > @@ -1792,7 +1792,7 @@ int tcp_conn_pool_sock(int pool[])
> >   *
> >   * Return: socket number on success, negative code if socket creation failed
> >   */
> > -int tcp_conn_new_sock(const struct ctx *c, sa_family_t af)
> > +static int tcp_conn_new_sock(const struct ctx *c, sa_family_t af)
> >  {
> >  	int s;
> >  
> > @@ -1811,6 +1811,27 @@ int tcp_conn_new_sock(const struct ctx *c, sa_family_t af)
> >  	return s;
> >  }
> >  
> > +/**
> > + * tcp_conn_sock() - Obtain a connectable socket in the host/init namespace
> > + * @c:		Execution context
> > + * @af:		Address family (AF_INET or AF_INET6)
> > + *
> > + * Return: Socket fd on success, -errno on failure
> > + */
> > +int tcp_conn_sock(const struct ctx *c, sa_family_t af)
> > +{
> > +	int *pool = af == AF_INET6 ? init_sock_pool6 : init_sock_pool4;
> > +	int s;
> > +
> > +	if ((s = tcp_conn_pool_sock(pool)) >= 0)
> > +		return s;
> > +
> > +	/* If the pool is empty we just open a new one without refilling the
> > +	 * pool to keep latency down.
> > +	 */
> > +	return tcp_conn_new_sock(c, af);
> > +}
> > +
> >  /**
> >   * tcp_conn_tap_mss() - Get MSS value advertised by tap/guest
> >   * @conn:	Connection pointer
> > @@ -1909,7 +1930,6 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
> >  			      const struct tcphdr *th, const char *opts,
> >  			      size_t optlen, const struct timespec *now)
> >  {
> > -	int *pool = af == AF_INET6 ? init_sock_pool6 : init_sock_pool4;
> >  	struct sockaddr_in addr4 = {
> >  		.sin_family = AF_INET,
> >  		.sin_port = th->dest,
> > @@ -1931,9 +1951,8 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
> >  	if (!(flow = flow_alloc()))
> >  		return;
> >  
> > -	if ((s = tcp_conn_pool_sock(pool)) < 0)
> > -		if ((s = tcp_conn_new_sock(c, af)) < 0)
> > -			goto cancel;
> > +	if ((s = tcp_conn_sock(c, af)) < 0)
> > +		goto cancel;
> >  
> >  	if (!c->no_map_gw) {
> >  		if (af == AF_INET && IN4_ARE_ADDR_EQUAL(daddr, &c->ip4.gw))
> > diff --git a/tcp_conn.h b/tcp_conn.h
> > index 20c7cb8b..e55edafe 100644
> > --- a/tcp_conn.h
> > +++ b/tcp_conn.h
> > @@ -159,7 +159,7 @@ bool tcp_flow_defer(union flow *flow);
> >  bool tcp_splice_flow_defer(union flow *flow);
> >  void tcp_splice_timer(const struct ctx *c, union flow *flow);
> >  int tcp_conn_pool_sock(int pool[]);
> > -int tcp_conn_new_sock(const struct ctx *c, sa_family_t af);
> > +int tcp_conn_sock(const struct ctx *c, sa_family_t af);
> >  void tcp_sock_refill_pool(const struct ctx *c, int pool[], sa_family_t af);
> >  void tcp_splice_refill(const struct ctx *c);
> >  
> > diff --git a/tcp_splice.c b/tcp_splice.c
> > index 576fe9be..609f3242 100644
> > --- a/tcp_splice.c
> > +++ b/tcp_splice.c
> > @@ -90,7 +90,7 @@ static const char *tcp_splice_flag_str[] __attribute((__unused__)) = {
> >  };
> >  
> >  /* Forward declaration */
> > -static int tcp_sock_refill_ns(void *arg);
> > +static int tcp_conn_sock_ns(const struct ctx *c, sa_family_t af);
> >  
> >  /**
> >   * tcp_splice_conn_epoll_events() - epoll events masks for given state
> > @@ -380,36 +380,19 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
> >  static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
> >  			  in_port_t port, uint8_t pif)
> >  {
> > +	sa_family_t af = CONN_V6(conn) ? AF_INET6 : AF_INET;
> >  	int s = -1;
> >  
> > -	/* If the pool is empty we take slightly different approaches
> > -	 * for init or ns sockets.  For init sockets we just open a
> > -	 * new one without refilling the pool to keep latency down.
> > -	 * For ns sockets, we're going to incur the latency of
> > -	 * entering the ns anyway, so we might as well refill the
> > -	 * pool.
> > -	 */
> >  	if (pif == PIF_SPLICE) {
> > -		int *p = CONN_V6(conn) ? init_sock_pool6 : init_sock_pool4;
> > -		sa_family_t af = CONN_V6(conn) ? AF_INET6 : AF_INET;
> > -
> >  		port += c->tcp.fwd_out.delta[port];
> >  
> > -		s = tcp_conn_pool_sock(p);
> > -		if (s < 0)
> > -			s = tcp_conn_new_sock(c, af);
> > +		s = tcp_conn_sock(c, af);
> >  	} else {
> > -		int *p = CONN_V6(conn) ? ns_sock_pool6 : ns_sock_pool4;
> > -
> >  		ASSERT(pif == PIF_HOST);
> >  
> >  		port += c->tcp.fwd_in.delta[port];
> >  
> > -		/* If pool is empty, refill it first */
> > -		if (p[TCP_SOCK_POOL_SIZE-1] < 0)
> > -			NS_CALL(tcp_sock_refill_ns, c);
> > -
> > -		s = tcp_conn_pool_sock(p);
> > +		s = tcp_conn_sock_ns(c, af);
> >  	}
> >  
> >  	if (s < 0) {
> > @@ -709,6 +692,27 @@ static int tcp_sock_refill_ns(void *arg)
> >  	return 0;
> >  }
> >  
> > +/**
> > + * tcp_conn_sock_ns() - Obtain a connectable socket in the namespace
> > + * @c:		Execution context
> > + * @af:		Address family (AF_INET or AF_INET6)
> > + *
> > + * Return: Socket fd in the namespace on success, -errno on failure
> > + */
> > +static int tcp_conn_sock_ns(const struct ctx *c, sa_family_t af)
> > +{
> > +	int *p = af == AF_INET6 ? ns_sock_pool6 : ns_sock_pool4;
> > +
> > +	/* If the pool is empty we have to incur the latency of entering the ns.
> > +	 * Therefore, we might as well refill the whole pool while we're at it,
> > +	 * which differs from tcp_conn_sock().
> > +	 */
> > +	if (p[TCP_SOCK_POOL_SIZE-1] < 0)
> 
> Nit, for consistency (but yes, it was already like this):
> 
> 	if (p[TCP_SOCK_POOL_SIZE - 1] < 0)

Adjusted, thanks.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

  reply	other threads:[~2024-02-19  2:03 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-06  1:17 [PATCH v2 00/22] More flow table preliminaries: address handling improvements David Gibson
2024-02-06  1:17 ` [PATCH v2 01/22] treewide: Use sa_family_t for address family variables David Gibson
2024-02-06  1:17 ` [PATCH v2 02/22] inany: Helper to test for various address types David Gibson
2024-02-18 20:58   ` Stefano Brivio
2024-02-19  1:48     ` David Gibson
2024-02-06  1:17 ` [PATCH v2 03/22] inany: Add inany_ntop() helper David Gibson
2024-02-06  1:17 ` [PATCH v2 04/22] inany: Provide more conveniently typed constants for special addresses David Gibson
2024-02-06  1:17 ` [PATCH v2 05/22] inany: Introduce union sockaddr_inany David Gibson
2024-02-06  1:17 ` [PATCH v2 06/22] util: Allow IN4_IS_* macros to operate on untyped addresses David Gibson
2024-02-06  1:17 ` [PATCH v2 07/22] tcp, udp: Don't precompute port remappings in epoll references David Gibson
2024-02-06  1:17 ` [PATCH v2 08/22] flow: Add helper to determine a flow's protocol David Gibson
2024-02-06  1:17 ` [PATCH v2 09/22] tcp_splice: Simplify clean up logic David Gibson
2024-02-18 20:59   ` Stefano Brivio
2024-02-19  1:50     ` David Gibson
2024-02-06  1:17 ` [PATCH v2 10/22] tcp_splice: Don't use flow_trace() before setting flow type David Gibson
2024-02-06  1:17 ` [PATCH v2 11/22] flow: Clarify flow entry life cycle, introduce uniform logging David Gibson
2024-02-18 21:00   ` Stefano Brivio
2024-02-19  1:58     ` David Gibson
2024-02-06  1:17 ` [PATCH v2 12/22] tcp, tcp_splice: Helpers for getting sockets from the pools David Gibson
2024-02-18 21:00   ` Stefano Brivio
2024-02-19  1:51     ` David Gibson [this message]
2024-02-06  1:17 ` [PATCH v2 13/22] tcp_splice: More specific variable names in new splice path David Gibson
2024-02-18 21:00   ` Stefano Brivio
2024-02-19  1:53     ` David Gibson
2024-02-06  1:17 ` [PATCH v2 14/22] tcp_splice: Merge tcp_splice_new() into its caller David Gibson
2024-02-06  1:17 ` [PATCH v2 15/22] tcp_splice: Make tcp_splice_connect() create its own sockets David Gibson
2024-02-06  1:17 ` [PATCH v2 16/22] tcp_splice: Improve error reporting on connect path David Gibson
2024-02-18 21:01   ` Stefano Brivio
2024-02-19  3:23     ` David Gibson
2024-02-06  1:17 ` [PATCH v2 17/22] tcp_splice: Improve logic deciding when to splice David Gibson
2024-02-06  1:17 ` [PATCH v2 18/22] tcp, tcp_splice: Parse listening socket epoll ref in tcp_listen_handler() David Gibson
2024-02-06  1:17 ` [PATCH v2 19/22] tcp: Validate TCP endpoint addresses David Gibson
2024-02-22 12:45   ` Stefano Brivio
2024-02-23  3:56     ` David Gibson
2024-02-06  1:17 ` [PATCH v2 20/22] tap: Disallow loopback addresses on tap interface David Gibson
2024-02-06  1:17 ` [PATCH v2 21/22] port_fwd: Fix copypasta error in port_fwd_scan_udp() comments David Gibson
2024-02-06  1:17 ` [PATCH v2 22/22] fwd: Rename port_fwd.[ch] and their contents David Gibson
2024-02-27 14:22 ` [PATCH v2 00/22] More flow table preliminaries: address handling improvements Stefano Brivio

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZdK0Nplwhj_Ud7D9@zatzit \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).