On Sun, Feb 18, 2024 at 10:00:29PM +0100, Stefano Brivio wrote: > On Tue, 6 Feb 2024 12:17:24 +1100 > David Gibson wrote: > > > We maintain pools of ready-to-connect sockets in both the original and > > (for pasta) guest namespace to reduce latency when starting new TCP > > connections. If we exhaust those pools we have to take a higher > > latency path to get a new socket. > > > > Currently we open-code that fallback in the places we need it. To > > improve clarity encapsulate that into helper functions. > > > > Signed-off-by: David Gibson > > --- > > tcp.c | 29 ++++++++++++++++++++++++----- > > tcp_conn.h | 2 +- > > tcp_splice.c | 46 +++++++++++++++++++++++++--------------------- > > 3 files changed, 50 insertions(+), 27 deletions(-) > > > > diff --git a/tcp.c b/tcp.c > > index e15b932f..c06d1cc4 100644 > > --- a/tcp.c > > +++ b/tcp.c > > @@ -1792,7 +1792,7 @@ int tcp_conn_pool_sock(int pool[]) > > * > > * Return: socket number on success, negative code if socket creation failed > > */ > > -int tcp_conn_new_sock(const struct ctx *c, sa_family_t af) > > +static int tcp_conn_new_sock(const struct ctx *c, sa_family_t af) > > { > > int s; > > > > @@ -1811,6 +1811,27 @@ int tcp_conn_new_sock(const struct ctx *c, sa_family_t af) > > return s; > > } > > > > +/** > > + * tcp_conn_sock() - Obtain a connectable socket in the host/init namespace > > + * @c: Execution context > > + * @af: Address family (AF_INET or AF_INET6) > > + * > > + * Return: Socket fd on success, -errno on failure > > + */ > > +int tcp_conn_sock(const struct ctx *c, sa_family_t af) > > +{ > > + int *pool = af == AF_INET6 ? init_sock_pool6 : init_sock_pool4; > > + int s; > > + > > + if ((s = tcp_conn_pool_sock(pool)) >= 0) > > + return s; > > + > > + /* If the pool is empty we just open a new one without refilling the > > + * pool to keep latency down. > > + */ > > + return tcp_conn_new_sock(c, af); > > +} > > + > > /** > > * tcp_conn_tap_mss() - Get MSS value advertised by tap/guest > > * @conn: Connection pointer > > @@ -1909,7 +1930,6 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, > > const struct tcphdr *th, const char *opts, > > size_t optlen, const struct timespec *now) > > { > > - int *pool = af == AF_INET6 ? init_sock_pool6 : init_sock_pool4; > > struct sockaddr_in addr4 = { > > .sin_family = AF_INET, > > .sin_port = th->dest, > > @@ -1931,9 +1951,8 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, > > if (!(flow = flow_alloc())) > > return; > > > > - if ((s = tcp_conn_pool_sock(pool)) < 0) > > - if ((s = tcp_conn_new_sock(c, af)) < 0) > > - goto cancel; > > + if ((s = tcp_conn_sock(c, af)) < 0) > > + goto cancel; > > > > if (!c->no_map_gw) { > > if (af == AF_INET && IN4_ARE_ADDR_EQUAL(daddr, &c->ip4.gw)) > > diff --git a/tcp_conn.h b/tcp_conn.h > > index 20c7cb8b..e55edafe 100644 > > --- a/tcp_conn.h > > +++ b/tcp_conn.h > > @@ -159,7 +159,7 @@ bool tcp_flow_defer(union flow *flow); > > bool tcp_splice_flow_defer(union flow *flow); > > void tcp_splice_timer(const struct ctx *c, union flow *flow); > > int tcp_conn_pool_sock(int pool[]); > > -int tcp_conn_new_sock(const struct ctx *c, sa_family_t af); > > +int tcp_conn_sock(const struct ctx *c, sa_family_t af); > > void tcp_sock_refill_pool(const struct ctx *c, int pool[], sa_family_t af); > > void tcp_splice_refill(const struct ctx *c); > > > > diff --git a/tcp_splice.c b/tcp_splice.c > > index 576fe9be..609f3242 100644 > > --- a/tcp_splice.c > > +++ b/tcp_splice.c > > @@ -90,7 +90,7 @@ static const char *tcp_splice_flag_str[] __attribute((__unused__)) = { > > }; > > > > /* Forward declaration */ > > -static int tcp_sock_refill_ns(void *arg); > > +static int tcp_conn_sock_ns(const struct ctx *c, sa_family_t af); > > > > /** > > * tcp_splice_conn_epoll_events() - epoll events masks for given state > > @@ -380,36 +380,19 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn, > > static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn, > > in_port_t port, uint8_t pif) > > { > > + sa_family_t af = CONN_V6(conn) ? AF_INET6 : AF_INET; > > int s = -1; > > > > - /* If the pool is empty we take slightly different approaches > > - * for init or ns sockets. For init sockets we just open a > > - * new one without refilling the pool to keep latency down. > > - * For ns sockets, we're going to incur the latency of > > - * entering the ns anyway, so we might as well refill the > > - * pool. > > - */ > > if (pif == PIF_SPLICE) { > > - int *p = CONN_V6(conn) ? init_sock_pool6 : init_sock_pool4; > > - sa_family_t af = CONN_V6(conn) ? AF_INET6 : AF_INET; > > - > > port += c->tcp.fwd_out.delta[port]; > > > > - s = tcp_conn_pool_sock(p); > > - if (s < 0) > > - s = tcp_conn_new_sock(c, af); > > + s = tcp_conn_sock(c, af); > > } else { > > - int *p = CONN_V6(conn) ? ns_sock_pool6 : ns_sock_pool4; > > - > > ASSERT(pif == PIF_HOST); > > > > port += c->tcp.fwd_in.delta[port]; > > > > - /* If pool is empty, refill it first */ > > - if (p[TCP_SOCK_POOL_SIZE-1] < 0) > > - NS_CALL(tcp_sock_refill_ns, c); > > - > > - s = tcp_conn_pool_sock(p); > > + s = tcp_conn_sock_ns(c, af); > > } > > > > if (s < 0) { > > @@ -709,6 +692,27 @@ static int tcp_sock_refill_ns(void *arg) > > return 0; > > } > > > > +/** > > + * tcp_conn_sock_ns() - Obtain a connectable socket in the namespace > > + * @c: Execution context > > + * @af: Address family (AF_INET or AF_INET6) > > + * > > + * Return: Socket fd in the namespace on success, -errno on failure > > + */ > > +static int tcp_conn_sock_ns(const struct ctx *c, sa_family_t af) > > +{ > > + int *p = af == AF_INET6 ? ns_sock_pool6 : ns_sock_pool4; > > + > > + /* If the pool is empty we have to incur the latency of entering the ns. > > + * Therefore, we might as well refill the whole pool while we're at it, > > + * which differs from tcp_conn_sock(). > > + */ > > + if (p[TCP_SOCK_POOL_SIZE-1] < 0) > > Nit, for consistency (but yes, it was already like this): > > if (p[TCP_SOCK_POOL_SIZE - 1] < 0) Adjusted, thanks. -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson