From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from gandalf.ozlabs.org (mail.ozlabs.org [IPv6:2404:9400:2221:ea00::3]) by passt.top (Postfix) with ESMTPS id 09AB15A026B for ; Thu, 17 Nov 2022 06:59:19 +0100 (CET) Received: by gandalf.ozlabs.org (Postfix, from userid 1007) id 4NCTkl2zgHz4xwl; Thu, 17 Nov 2022 16:59:11 +1100 (AEDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gibson.dropbear.id.au; s=201602; t=1668664751; bh=pOH3Y7GEJk9i6qaXVEUIjBddFPRSwenF/qoxMpQLYHo=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=B8Eh60R/VFW5PNuIyCkLIOD0NfVjs4a+sCwWlMjuDLHu9rIkqmTwGJeMKTU01/Dkt 6tvcVpdM7WHMKyFIsPdcqTR+CX06+q6IwSkLu+Cnr0gBDtfQpGjk+HSAXT1d46iTzL +umMXte+j4ngj+/Rny9Pn3IIUOpiMuWz6jepBmWE= From: David Gibson To: passt-dev@passt.top, Stefano Brivio Subject: [PATCH v2 14/32] tcp: Separate helpers to create ns listening sockets Date: Thu, 17 Nov 2022 16:58:50 +1100 Message-Id: <20221117055908.2782981-15-david@gibson.dropbear.id.au> X-Mailer: git-send-email 2.38.1 In-Reply-To: <20221117055908.2782981-1-david@gibson.dropbear.id.au> References: <20221117055908.2782981-1-david@gibson.dropbear.id.au> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Message-ID-Hash: P7FAFLNQ6SQUWQTI42E7YZ2EVXLO7665 X-Message-ID-Hash: P7FAFLNQ6SQUWQTI42E7YZ2EVXLO7665 X-MailFrom: dgibson@gandalf.ozlabs.org X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header CC: David Gibson X-Mailman-Version: 3.3.3 Precedence: list List-Id: Development discussion and patches for passt Archived-At: Archived-At: List-Archive: List-Archive: List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: tcp_sock_init*() can create either sockets listening on the host, or in the pasta network namespace (with @ns==1). There are, however, a number of differences in how these two cases work in practice though. "ns" sockets are only used in pasta mode, and they always lead to spliced connections only. The functions are also only ever called in "ns" mode with a NULL address and interface name, and it doesn't really make sense for them to be called any other way. Later changes will introduce further differences in behaviour between these two cases, so it makes more sense to use separate functions for creating the ns listening sockets than the regular external/host listening sockets. --- conf.c | 6 +-- tcp.c | 132 ++++++++++++++++++++++++++++++++++++++------------------- tcp.h | 4 +- 3 files changed, 93 insertions(+), 49 deletions(-) diff --git a/conf.c b/conf.c index b07d661..4721c97 100644 --- a/conf.c +++ b/conf.c @@ -209,7 +209,7 @@ static int conf_ports(const struct ctx *c, char optname, const char *optarg, for (i = 0; i < PORT_EPHEMERAL_MIN; i++) { if (optname == 't') - tcp_sock_init(c, 0, AF_UNSPEC, NULL, NULL, i); + tcp_sock_init(c, AF_UNSPEC, NULL, NULL, i); else if (optname == 'u') udp_sock_init(c, 0, AF_UNSPEC, NULL, NULL, i); } @@ -287,7 +287,7 @@ static int conf_ports(const struct ctx *c, char optname, const char *optarg, bitmap_set(fwd->map, i); if (optname == 't') - tcp_sock_init(c, 0, af, addr, ifname, i); + tcp_sock_init(c, af, addr, ifname, i); else if (optname == 'u') udp_sock_init(c, 0, af, addr, ifname, i); } @@ -333,7 +333,7 @@ static int conf_ports(const struct ctx *c, char optname, const char *optarg, fwd->delta[i] = mapped_range.first - orig_range.first; if (optname == 't') - tcp_sock_init(c, 0, af, addr, ifname, i); + tcp_sock_init(c, af, addr, ifname, i); else if (optname == 'u') udp_sock_init(c, 0, af, addr, ifname, i); } diff --git a/tcp.c b/tcp.c index 35fca31..306f928 100644 --- a/tcp.c +++ b/tcp.c @@ -2987,15 +2987,15 @@ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events, /** * tcp_sock_init4() - Initialise listening sockets for a given IPv4 port * @c: Execution context - * @ns: In pasta mode, if set, bind with loopback address in namespace * @addr: Pointer to address for binding, NULL if not configured * @ifname: Name of interface to bind to, NULL if not configured * @port: Port, host order */ -static void tcp_sock_init4(const struct ctx *c, int ns, const struct in_addr *addr, +static void tcp_sock_init4(const struct ctx *c, const struct in_addr *addr, const char *ifname, in_port_t port) { - union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.outbound = ns }; + in_port_t idx = port + c->tcp.fwd_in.delta[port]; + union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.index = idx }; bool spliced = false, tap = true; int s; @@ -3006,14 +3006,9 @@ static void tcp_sock_init4(const struct ctx *c, int ns, const struct in_addr *ad if (!addr) addr = &c->ip4.addr; - tap = !ns && !IN4_IS_ADDR_LOOPBACK(addr); + tap = !IN4_IS_ADDR_LOOPBACK(addr); } - if (ns) - tref.tcp.index = (in_port_t)(port + c->tcp.fwd_out.delta[port]); - else - tref.tcp.index = (in_port_t)(port + c->tcp.fwd_in.delta[port]); - if (tap) { s = sock_l4(c, AF_INET, IPPROTO_TCP, addr, ifname, port, tref.u32); @@ -3039,29 +3034,25 @@ static void tcp_sock_init4(const struct ctx *c, int ns, const struct in_addr *ad else s = -1; - if (c->tcp.fwd_out.mode == FWD_AUTO) { - if (ns) - tcp_sock_ns[port][V4] = s; - else - tcp_sock_init_lo[port][V4] = s; - } + if (c->tcp.fwd_out.mode == FWD_AUTO) + tcp_sock_init_lo[port][V4] = s; } } /** * tcp_sock_init6() - Initialise listening sockets for a given IPv6 port * @c: Execution context - * @ns: In pasta mode, if set, bind with loopback address in namespace * @addr: Pointer to address for binding, NULL if not configured * @ifname: Name of interface to bind to, NULL if not configured * @port: Port, host order */ -static void tcp_sock_init6(const struct ctx *c, int ns, +static void tcp_sock_init6(const struct ctx *c, const struct in6_addr *addr, const char *ifname, in_port_t port) { - union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.outbound = ns, - .tcp.v6 = 1 }; + in_port_t idx = port + c->tcp.fwd_in.delta[port]; + union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.v6 = 1, + .tcp.index = idx }; bool spliced = false, tap = true; int s; @@ -3073,14 +3064,9 @@ static void tcp_sock_init6(const struct ctx *c, int ns, if (!addr) addr = &c->ip6.addr; - tap = !ns && !IN6_IS_ADDR_LOOPBACK(addr); + tap = !IN6_IS_ADDR_LOOPBACK(addr); } - if (ns) - tref.tcp.index = (in_port_t)(port + c->tcp.fwd_out.delta[port]); - else - tref.tcp.index = (in_port_t)(port + c->tcp.fwd_in.delta[port]); - if (tap) { s = sock_l4(c, AF_INET6, IPPROTO_TCP, addr, ifname, port, tref.u32); @@ -3105,40 +3091,99 @@ static void tcp_sock_init6(const struct ctx *c, int ns, else s = -1; - if (c->tcp.fwd_out.mode == FWD_AUTO) { - if (ns) - tcp_sock_ns[port][V6] = s; - else - tcp_sock_init_lo[port][V6] = s; - } + if (c->tcp.fwd_out.mode == FWD_AUTO) + tcp_sock_init_lo[port][V6] = s; } } /** - * tcp_sock_init() - Initialise listening sockets for a given port + * tcp_sock_init() - Create listening sockets for a given host ("inbound") port * @c: Execution context - * @ns: In pasta mode, if set, bind with loopback address in namespace * @af: Address family to select a specific IP version, or AF_UNSPEC * @addr: Pointer to address for binding, NULL if not configured * @ifname: Name of interface to bind to, NULL if not configured * @port: Port, host order */ -void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af, - const void *addr, const char *ifname, in_port_t port) +void tcp_sock_init(const struct ctx *c, sa_family_t af, const void *addr, + const char *ifname, in_port_t port) { if ((af == AF_INET || af == AF_UNSPEC) && c->ifi4) - tcp_sock_init4(c, ns, addr, ifname, port); + tcp_sock_init4(c, addr, ifname, port); if ((af == AF_INET6 || af == AF_UNSPEC) && c->ifi6) - tcp_sock_init6(c, ns, addr, ifname, port); + tcp_sock_init6(c, addr, ifname, port); +} + +/** + * tcp_ns_sock_init4() - Init socket to listen for outbound IPv4 connections + * @c: Execution context + * @port: Port, host order + */ +static void tcp_ns_sock_init4(const struct ctx *c, in_port_t port) +{ + in_port_t idx = port + c->tcp.fwd_out.delta[port]; + union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.outbound = 1, + .tcp.splice = 1, .tcp.index = idx }; + struct in_addr loopback = { htonl(INADDR_LOOPBACK) }; + int s; + + assert(c->mode == MODE_PASTA); + + s = sock_l4(c, AF_INET, IPPROTO_TCP, &loopback, NULL, port, tref.u32); + if (s >= 0) + tcp_sock_set_bufsize(c, s); + else + s = -1; + + if (c->tcp.fwd_out.mode == FWD_AUTO) + tcp_sock_ns[port][V4] = s; } /** - * tcp_sock_init_ns() - Bind sockets in namespace for outbound connections + * tcp_ns_sock_init6() - Init socket to listen for outbound IPv6 connections + * @c: Execution context + * @port: Port, host order + */ +static void tcp_ns_sock_init6(const struct ctx *c, in_port_t port) +{ + in_port_t idx = port + c->tcp.fwd_out.delta[port]; + union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.outbound = 1, + .tcp.splice = 1, .tcp.v6 = 1, + .tcp.index = idx }; + int s; + + assert(c->mode == MODE_PASTA); + + s = sock_l4(c, AF_INET6, IPPROTO_TCP, &in6addr_loopback, NULL, port, + tref.u32); + if (s >= 0) + tcp_sock_set_bufsize(c, s); + else + s = -1; + + if (c->tcp.fwd_out.mode == FWD_AUTO) + tcp_sock_ns[port][V6] = s; +} + +/** + * tcp_ns_sock_init() - Init socket to listen for spliced outbound connections + * @c: Execution context + * @port: Port, host order + */ +void tcp_ns_sock_init(const struct ctx *c, in_port_t port) +{ + if (c->ifi4) + tcp_ns_sock_init4(c, port); + if (c->ifi6) + tcp_ns_sock_init6(c, port); +} + +/** + * tcp_ns_socks_init() - Bind sockets in namespace for outbound connections * @arg: Execution context * * Return: 0 */ -static int tcp_sock_init_ns(void *arg) +static int tcp_ns_socks_init(void *arg) { struct ctx *c = (struct ctx *)arg; unsigned port; @@ -3149,7 +3194,7 @@ static int tcp_sock_init_ns(void *arg) if (!bitmap_isset(c->tcp.fwd_out.map, port)) continue; - tcp_sock_init(c, 1, AF_UNSPEC, NULL, NULL, port); + tcp_ns_sock_init(c, port); } return 0; @@ -3279,7 +3324,7 @@ int tcp_init(struct ctx *c) if (c->mode == MODE_PASTA) { tcp_splice_init(c); - NS_CALL(tcp_sock_init_ns, c); + NS_CALL(tcp_ns_socks_init, c); refill_arg.ns = 1; NS_CALL(tcp_sock_refill, &refill_arg); @@ -3364,8 +3409,7 @@ static int tcp_port_rebind(void *arg) if ((a->c->ifi4 && tcp_sock_ns[port][V4] == -1) || (a->c->ifi6 && tcp_sock_ns[port][V6] == -1)) - tcp_sock_init(a->c, 1, AF_UNSPEC, NULL, NULL, - port); + tcp_ns_sock_init(a->c, port); } } else { for (port = 0; port < NUM_PORTS; port++) { @@ -3398,7 +3442,7 @@ static int tcp_port_rebind(void *arg) if ((a->c->ifi4 && tcp_sock_init_ext[port][V4] == -1) || (a->c->ifi6 && tcp_sock_init_ext[port][V6] == -1)) - tcp_sock_init(a->c, 0, AF_UNSPEC, NULL, NULL, + tcp_sock_init(a->c, AF_UNSPEC, NULL, NULL, port); } } diff --git a/tcp.h b/tcp.h index 49738ef..f4ed298 100644 --- a/tcp.h +++ b/tcp.h @@ -19,8 +19,8 @@ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events, const struct timespec *now); int tcp_tap_handler(struct ctx *c, int af, const void *addr, const struct pool *p, const struct timespec *now); -void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af, - const void *addr, const char *ifname, in_port_t port); +void tcp_sock_init(const struct ctx *c, sa_family_t af, const void *addr, + const char *ifname, in_port_t port); int tcp_init(struct ctx *c); void tcp_timer(struct ctx *c, const struct timespec *ts); void tcp_defer_handler(struct ctx *c); -- 2.38.1