From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from gandalf.ozlabs.org (mail.ozlabs.org [IPv6:2404:9400:2221:ea00::3]) by passt.top (Postfix) with ESMTPS id 202C25A0275 for ; Mon, 29 Jan 2024 05:36:06 +0100 (CET) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gibson.dropbear.id.au; s=202312; t=1706502959; bh=jDWFrWDzMh16iSr1hUIaXa/9tmqQWLjTOUnSk5KeJJM=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=Mne0TC9R0b8LAHemUCreKq7lNXxFrOeM1hqn4yAPAyKPfCWmXMisYKrbpXGoJbaOA 5aZi2etnKs0ctw1gsFtXy01Q5CBGs62jF2HDfaOSo2rtxmdNKXJYdhG13SiQc2FrCr S1xMicDSqcO4yTQu60V4yNXidUf9Xdwe3+JPUlc8UuXajmWdjhJVqMd2fQ7r0l9WeR rVwOwOxcwoCvtObrbsOaD13PREgaL3skmOrLLTDneoKvm1ycHpu37LlehBnN1UDGRL uSdFAQi9eZxKGYWBEN0SAxDnhtKagOS4MR+kGldQYD6NSDyemiNwD9arxJ/uNbx+sg X2yg9nMF9D7Nw== Received: by gandalf.ozlabs.org (Postfix, from userid 1007) id 4TNb8b5nD5z4x2P; Mon, 29 Jan 2024 15:35:59 +1100 (AEDT) From: David Gibson To: Stefano Brivio , passt-dev@passt.top Subject: [PATCH 06/16] tcp, tcp_splice: Helpers for getting sockets from the pools Date: Mon, 29 Jan 2024 15:35:47 +1100 Message-ID: <20240129043557.823451-7-david@gibson.dropbear.id.au> X-Mailer: git-send-email 2.43.0 In-Reply-To: <20240129043557.823451-1-david@gibson.dropbear.id.au> References: <20240129043557.823451-1-david@gibson.dropbear.id.au> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Message-ID-Hash: UH64W5GQOZ5PWJIAKURGYERVJPOYN2DQ X-Message-ID-Hash: UH64W5GQOZ5PWJIAKURGYERVJPOYN2DQ X-MailFrom: dgibson@gandalf.ozlabs.org X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header CC: David Gibson X-Mailman-Version: 3.3.8 Precedence: list List-Id: Development discussion and patches for passt Archived-At: Archived-At: List-Archive: List-Archive: List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: We maintain pools of ready-to-connect sockets in both the original and (for pasta) guest namespace to reduce latency when starting new TCP connections. If we exhaust those pools we have to take a higher latency path to get a new socket. Currently we open-code that fallback in the places we need it. To improve clarity encapsulate that into helper functions. Signed-off-by: David Gibson --- tcp.c | 29 ++++++++++++++++++++++++----- tcp_conn.h | 2 +- tcp_splice.c | 46 +++++++++++++++++++++++++--------------------- 3 files changed, 50 insertions(+), 27 deletions(-) diff --git a/tcp.c b/tcp.c index cd833728..8daefe99 100644 --- a/tcp.c +++ b/tcp.c @@ -1792,7 +1792,7 @@ int tcp_conn_pool_sock(int pool[]) * * Return: socket number on success, negative code if socket creation failed */ -int tcp_conn_new_sock(const struct ctx *c, sa_family_t af) +static int tcp_conn_new_sock(const struct ctx *c, sa_family_t af) { int s; @@ -1811,6 +1811,27 @@ int tcp_conn_new_sock(const struct ctx *c, sa_family_t af) return s; } +/** + * tcp_conn_sock() - Obtain a connectable socket in the host/init namespace + * @c: Execution context + * @af: Address family (AF_INET or AF_INET6) + * + * Return: Socket fd on success, -errno on failure + */ +int tcp_conn_sock(const struct ctx *c, sa_family_t af) +{ + int *pool = af == AF_INET6 ? init_sock_pool6 : init_sock_pool4; + int s; + + if ((s = tcp_conn_pool_sock(pool)) >= 0) + return s; + + /* If the pool is empty we just open a new one without refilling the + * pool to keep latency down. + */ + return tcp_conn_new_sock(c, af); +} + /** * tcp_conn_tap_mss() - Get MSS value advertised by tap/guest * @conn: Connection pointer @@ -1909,7 +1930,6 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, const struct tcphdr *th, const char *opts, size_t optlen, const struct timespec *now) { - int *pool = af == AF_INET6 ? init_sock_pool6 : init_sock_pool4; struct sockaddr_in addr4 = { .sin_family = AF_INET, .sin_port = th->dest, @@ -1931,9 +1951,8 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, if (!(flow = flow_alloc())) return; - if ((s = tcp_conn_pool_sock(pool)) < 0) - if ((s = tcp_conn_new_sock(c, af)) < 0) - goto cancel; + if ((s = tcp_conn_sock(c, af)) < 0) + goto cancel; if (!c->no_map_gw) { if (af == AF_INET && IN4_ARE_ADDR_EQUAL(daddr, &c->ip4.gw)) diff --git a/tcp_conn.h b/tcp_conn.h index 20c7cb8b..e55edafe 100644 --- a/tcp_conn.h +++ b/tcp_conn.h @@ -159,7 +159,7 @@ bool tcp_flow_defer(union flow *flow); bool tcp_splice_flow_defer(union flow *flow); void tcp_splice_timer(const struct ctx *c, union flow *flow); int tcp_conn_pool_sock(int pool[]); -int tcp_conn_new_sock(const struct ctx *c, sa_family_t af); +int tcp_conn_sock(const struct ctx *c, sa_family_t af); void tcp_sock_refill_pool(const struct ctx *c, int pool[], sa_family_t af); void tcp_splice_refill(const struct ctx *c); diff --git a/tcp_splice.c b/tcp_splice.c index fbce4879..16be0c59 100644 --- a/tcp_splice.c +++ b/tcp_splice.c @@ -90,7 +90,7 @@ static const char *tcp_splice_flag_str[] __attribute((__unused__)) = { }; /* Forward declaration */ -static int tcp_sock_refill_ns(void *arg); +static int tcp_conn_sock_ns(const struct ctx *c, sa_family_t af); /** * tcp_splice_conn_epoll_events() - epoll events masks for given state @@ -380,36 +380,19 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn, static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn, in_port_t port, uint8_t pif) { + sa_family_t af = CONN_V6(conn) ? AF_INET6 : AF_INET; int s = -1; - /* If the pool is empty we take slightly different approaches - * for init or ns sockets. For init sockets we just open a - * new one without refilling the pool to keep latency down. - * For ns sockets, we're going to incur the latency of - * entering the ns anyway, so we might as well refill the - * pool. - */ if (pif == PIF_SPLICE) { - int *p = CONN_V6(conn) ? init_sock_pool6 : init_sock_pool4; - sa_family_t af = CONN_V6(conn) ? AF_INET6 : AF_INET; - port += c->tcp.fwd_out.delta[port]; - s = tcp_conn_pool_sock(p); - if (s < 0) - s = tcp_conn_new_sock(c, af); + s = tcp_conn_sock(c, af); } else { - int *p = CONN_V6(conn) ? ns_sock_pool6 : ns_sock_pool4; - ASSERT(pif == PIF_HOST); port += c->tcp.fwd_in.delta[port]; - /* If pool is empty, refill it first */ - if (p[TCP_SOCK_POOL_SIZE-1] < 0) - NS_CALL(tcp_sock_refill_ns, c); - - s = tcp_conn_pool_sock(p); + s = tcp_conn_sock_ns(c, af); } if (s < 0) { @@ -709,6 +692,27 @@ static int tcp_sock_refill_ns(void *arg) return 0; } +/** + * tcp_conn_sock_ns() - Obtain a connectable socket in the namespace + * @c: Execution context + * @af: Address family (AF_INET or AF_INET6) + * + * Return: Socket fd in the namespace on success, -errno on failure + */ +static int tcp_conn_sock_ns(const struct ctx *c, sa_family_t af) +{ + int *p = af == AF_INET6 ? ns_sock_pool6 : ns_sock_pool4; + + /* If the pool is empty we have to incur the latency of entering the ns. + * Therefore, we might as well refill the whole pool while we're at it, + * which differs from tcp_conn_sock(). + */ + if (p[TCP_SOCK_POOL_SIZE-1] < 0) + NS_CALL(tcp_sock_refill_ns, c); + + return tcp_conn_pool_sock(p); +} + /** * tcp_splice_refill() - Refill pools of resources needed for splicing * @c: Execution context -- 2.43.0