From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from gandalf.ozlabs.org (mail.ozlabs.org [IPv6:2404:9400:2221:ea00::3]) by passt.top (Postfix) with ESMTPS id 0ECF75A0275 for ; Mon, 19 Feb 2024 08:57:03 +0100 (CET) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gibson.dropbear.id.au; s=202312; t=1708329414; bh=tj1YfsCw8eByKWcNpdiiZMw6hebVOLeyxeaS5E0Qm5M=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=DmhCY5MR3kwnRh+IqlkDjIjuKr5aqw4Ty+QbaijF2wAV1TsVg6bevrDCamvw7ps4Q P6/WTaczdVVKMl2S+lJIBYQPVx+aQz8Z4LzjWNqgjBheGemnHjvLXSF3rrGuVbyvoV czgIPR+j/yRXdFwlE+SAlSZmF0eyjx4B6d1dCVw9YgZ8re7e50XiPCAtzoMWi1YyOr f16wKE5VwDBAU/P7RHDpZ9qc75QBLwokJXm+Fhr3tmVCmotKk1UPf9oC+K6jazfuuK sFj0MJZ9nNiODraNAgBfuVWdO4hLiLz0xYpvj/q84PhC9tCiRqcw6hRor5R5vyLfNt GJ4cE/J3P1dhQ== Received: by gandalf.ozlabs.org (Postfix, from userid 1007) id 4TdZck3bXqz4x1m; Mon, 19 Feb 2024 18:56:54 +1100 (AEDT) From: David Gibson To: Stefano Brivio , passt-dev@passt.top Subject: [PATCH 5/6] tcp, tcp_splice: Helpers for getting sockets from the pools Date: Mon, 19 Feb 2024 18:56:50 +1100 Message-ID: <20240219075651.1360229-6-david@gibson.dropbear.id.au> X-Mailer: git-send-email 2.43.2 In-Reply-To: <20240219075651.1360229-1-david@gibson.dropbear.id.au> References: <20240219075651.1360229-1-david@gibson.dropbear.id.au> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Message-ID-Hash: WC4AGSA4G3URDGPO2ATTA7LQYFDWEXWH X-Message-ID-Hash: WC4AGSA4G3URDGPO2ATTA7LQYFDWEXWH X-MailFrom: dgibson@gandalf.ozlabs.org X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header CC: David Gibson X-Mailman-Version: 3.3.8 Precedence: list List-Id: Development discussion and patches for passt Archived-At: Archived-At: List-Archive: List-Archive: List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: We maintain pools of ready-to-connect sockets in both the original and (for pasta) guest namespace to reduce latency when starting new TCP connections. If we exhaust those pools we have to take a higher latency path to get a new socket. Currently we open-code that fallback in the places we need it. To improve clarity encapsulate that into helper functions. While we're at it, give those helpers clearer error reporting. Signed-off-by: David Gibson --- tcp.c | 34 +++++++++++++++++++++++++++----- tcp_conn.h | 2 +- tcp_splice.c | 55 ++++++++++++++++++++++++++++++---------------------- 3 files changed, 62 insertions(+), 29 deletions(-) diff --git a/tcp.c b/tcp.c index ad56ffc3..34e32641 100644 --- a/tcp.c +++ b/tcp.c @@ -1792,7 +1792,7 @@ int tcp_conn_pool_sock(int pool[]) * * Return: socket number on success, negative code if socket creation failed */ -int tcp_conn_new_sock(const struct ctx *c, sa_family_t af) +static int tcp_conn_new_sock(const struct ctx *c, sa_family_t af) { int s; @@ -1811,6 +1811,32 @@ int tcp_conn_new_sock(const struct ctx *c, sa_family_t af) return s; } +/** + * tcp_conn_sock() - Obtain a connectable socket in the host/init namespace + * @c: Execution context + * @af: Address family (AF_INET or AF_INET6) + * + * Return: Socket fd on success, -errno on failure + */ +int tcp_conn_sock(const struct ctx *c, sa_family_t af) +{ + int *pool = af == AF_INET6 ? init_sock_pool6 : init_sock_pool4; + int s; + + if ((s = tcp_conn_pool_sock(pool)) >= 0) + return s; + + /* If the pool is empty we just open a new one without refilling the + * pool to keep latency down. + */ + if ((s = tcp_conn_new_sock(c, af)) >= 0) + return s; + + err("TCP: Unable to open socket for new connection: %s", + strerror(-s)); + return -1; +} + /** * tcp_conn_tap_mss() - Get MSS value advertised by tap/guest * @conn: Connection pointer @@ -1909,7 +1935,6 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, const struct tcphdr *th, const char *opts, size_t optlen, const struct timespec *now) { - int *pool = af == AF_INET6 ? init_sock_pool6 : init_sock_pool4; struct sockaddr_in addr4 = { .sin_family = AF_INET, .sin_port = th->dest, @@ -1931,9 +1956,8 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, if (!(flow = flow_alloc())) return; - if ((s = tcp_conn_pool_sock(pool)) < 0) - if ((s = tcp_conn_new_sock(c, af)) < 0) - goto cancel; + if ((s = tcp_conn_sock(c, af)) < 0) + goto cancel; if (!c->no_map_gw) { if (af == AF_INET && IN4_ARE_ADDR_EQUAL(daddr, &c->ip4.gw)) diff --git a/tcp_conn.h b/tcp_conn.h index 92d4807a..d280b222 100644 --- a/tcp_conn.h +++ b/tcp_conn.h @@ -159,7 +159,7 @@ bool tcp_flow_defer(union flow *flow); bool tcp_splice_flow_defer(union flow *flow); void tcp_splice_timer(const struct ctx *c, union flow *flow); int tcp_conn_pool_sock(int pool[]); -int tcp_conn_new_sock(const struct ctx *c, sa_family_t af); +int tcp_conn_sock(const struct ctx *c, sa_family_t af); int tcp_sock_refill_pool(const struct ctx *c, int pool[], sa_family_t af); void tcp_splice_refill(const struct ctx *c); diff --git a/tcp_splice.c b/tcp_splice.c index ee68029b..5b38a82d 100644 --- a/tcp_splice.c +++ b/tcp_splice.c @@ -376,6 +376,34 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn, return 0; } +/** + * tcp_conn_sock_ns() - Obtain a connectable socket in the namespace + * @c: Execution context + * @af: Address family (AF_INET or AF_INET6) + * + * Return: Socket fd in the namespace on success, -errno on failure + */ +static int tcp_conn_sock_ns(const struct ctx *c, sa_family_t af) +{ + int *p = af == AF_INET6 ? ns_sock_pool6 : ns_sock_pool4; + int s; + + if ((s = tcp_conn_pool_sock(p)) >= 0) + return s; + + /* If the pool is empty we have to incur the latency of entering the ns. + * Therefore, we might as well refill the whole pool while we're at it. + * This differs from tcp_conn_sock(). + */ + NS_CALL(tcp_sock_refill_ns, c); + + if ((s = tcp_conn_pool_sock(p)) >= 0) + return s; + + err("TCP: No available ns sockets for new connection"); + return -1; +} + /** * tcp_splice_new() - Handle new spliced connection * @c: Execution context @@ -388,38 +416,19 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn, static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn, in_port_t port, uint8_t pif) { + sa_family_t af = CONN_V6(conn) ? AF_INET6 : AF_INET; int s = -1; - /* If the pool is empty we take slightly different approaches - * for init or ns sockets. For init sockets we just open a - * new one without refilling the pool to keep latency down. - * For ns sockets, we're going to incur the latency of - * entering the ns anyway, so we might as well refill the - * pool. - */ if (pif == PIF_SPLICE) { - int *p = CONN_V6(conn) ? init_sock_pool6 : init_sock_pool4; - sa_family_t af = CONN_V6(conn) ? AF_INET6 : AF_INET; - - s = tcp_conn_pool_sock(p); - if (s < 0) - s = tcp_conn_new_sock(c, af); + s = tcp_conn_sock(c, af); } else { - int *p = CONN_V6(conn) ? ns_sock_pool6 : ns_sock_pool4; - ASSERT(pif == PIF_HOST); - /* If pool is empty, refill it first */ - if (p[TCP_SOCK_POOL_SIZE-1] < 0) - NS_CALL(tcp_sock_refill_ns, c); - - s = tcp_conn_pool_sock(p); + s = tcp_conn_sock_ns(c, af); } - if (s < 0) { - warn("Couldn't open connectable socket for splice (%d)", s); + if (s < 0) return s; - } return tcp_splice_connect(c, conn, s, port); } -- 2.43.2