From mboxrd@z Thu Jan 1 00:00:00 1970 Received: by passt.top (Postfix, from userid 1000) id 489565A026B; Wed, 12 Oct 2022 17:45:54 +0200 (CEST) From: Stefano Brivio To: passt-dev@passt.top Subject: [PATCH v2 3/3] tcp: Don't create 'tap' socket for ports that are bound to loopback only Date: Wed, 12 Oct 2022 17:45:54 +0200 Message-Id: <20221012154554.1650667-4-sbrivio@redhat.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20221012154554.1650667-1-sbrivio@redhat.com> References: <20221012154554.1650667-1-sbrivio@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Message-ID-Hash: W4WCUKD2IOWBSAJD5IOJFWXFJPVNMWHA X-Message-ID-Hash: W4WCUKD2IOWBSAJD5IOJFWXFJPVNMWHA X-MailFrom: sbrivio@passt.top X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header CC: David Gibson X-Mailman-Version: 3.3.3 Precedence: list List-Id: Development discussion and patches for passt Archived-At: <> Archived-At: List-Archive: <> List-Archive: List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: If the user specifies an explicit loopback address for a port binding, we're going to use that address for the 'tap' socket, and the same exact address for the 'spliced' socket (because those are, by definition, only bound to loopback addresses). This means that the second binding will fail, and, unexpectedly, the port is forwarded, but via tap device, which means the source address in the namespace won't be a loopback address. Make it explicit under which conditions we're creating which kind of socket, by refactoring tcp_sock_init() into two separate functions for IPv4 and IPv6 and gathering those conditions at the beginning. Also, don't create spliced sockets if the user specifies explicitly a non-loopback address, those are harmless but not desired either. Fixes: 3c6ae625101a ("conf, tcp, udp: Allow address specification for forwarded ports") Signed-off-by: Stefano Brivio --- v2: - use new IPV4_IS_LOOPBACK macro in tcp_conn_from_sock() too tcp.c | 183 +++++++++++++++++++++++++++++++++++---------------------- util.h | 3 + 2 files changed, 117 insertions(+), 69 deletions(-) diff --git a/tcp.c b/tcp.c index 0d4ce57..e6979a9 100644 --- a/tcp.c +++ b/tcp.c @@ -275,6 +275,7 @@ #include #include #include +#include #include #include #include @@ -2906,8 +2907,8 @@ static void tcp_conn_from_sock(struct ctx *c, union epoll_ref ref, memset(&conn->a.a4.zero, 0, sizeof(conn->a.a4.zero)); memset(&conn->a.a4.one, 0xff, sizeof(conn->a.a4.one)); - if (s_addr >> IN_CLASSA_NSHIFT == IN_LOOPBACKNET || - s_addr == INADDR_ANY || htonl(s_addr) == c->ip4.addr_seen) + if (IPV4_IS_LOOPBACK(s_addr) || s_addr == INADDR_ANY || + htonl(s_addr) == c->ip4.addr_seen) s_addr = ntohl(c->ip4.gw); s_addr = htonl(s_addr); @@ -3073,109 +3074,153 @@ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events, } /** - * tcp_sock_init() - Initialise listening sockets for a given port + * tcp_sock_init4() - Initialise listening sockets for a given IPv4 port * @c: Execution context * @ns: In pasta mode, if set, bind with loopback address in namespace - * @af: Address family to select a specific IP version, or AF_UNSPEC * @addr: Pointer to address for binding, NULL if not configured * @ifname: Name of interface to bind to, NULL if not configured * @port: Port, host order */ -void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af, - const void *addr, const char *ifname, in_port_t port) +static void tcp_sock_init4(const struct ctx *c, int ns, const in_addr_t *addr, + const char *ifname, in_port_t port) { union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.outbound = ns }; - const void *bind_addr; + bool spliced = false, tap = true; int s; + if (c->mode == MODE_PASTA) { + spliced = !addr || + ntohl(*addr) == INADDR_ANY || + IPV4_IS_LOOPBACK(ntohl(*addr)); + + if (!addr) + addr = &c->ip4.addr; + + tap = !ns && !IPV4_IS_LOOPBACK(ntohl(*addr)); + } + if (ns) tref.tcp.index = (in_port_t)(port + c->tcp.fwd_out.delta[port]); else tref.tcp.index = (in_port_t)(port + c->tcp.fwd_in.delta[port]); - if (af == AF_INET || af == AF_UNSPEC) { - if (!addr && c->mode == MODE_PASTA) - bind_addr = &c->ip4.addr; + if (tap) { + s = sock_l4(c, AF_INET, IPPROTO_TCP, addr, ifname, port, + tref.u32); + if (s >= 0) + tcp_sock_set_bufsize(c, s); else - bind_addr = addr; + s = -1; - tref.tcp.v6 = 0; - tref.tcp.splice = 0; + if (c->tcp.fwd_in.mode == FWD_AUTO) + tcp_sock_init_ext[port][V4] = s; + } - if (!ns) { - s = sock_l4(c, AF_INET, IPPROTO_TCP, bind_addr, ifname, - port, tref.u32); - if (s >= 0) - tcp_sock_set_bufsize(c, s); - else - s = -1; + if (spliced) { + tref.tcp.splice = 1; - if (c->tcp.fwd_in.mode == FWD_AUTO) - tcp_sock_init_ext[port][V4] = s; - } + addr = &(uint32_t){ htonl(INADDR_LOOPBACK) }; - if (c->mode == MODE_PASTA) { - bind_addr = &(uint32_t){ htonl(INADDR_LOOPBACK) }; + s = sock_l4(c, AF_INET, IPPROTO_TCP, addr, ifname, port, + tref.u32); + if (s >= 0) + tcp_sock_set_bufsize(c, s); + else + s = -1; - tref.tcp.splice = 1; - s = sock_l4(c, AF_INET, IPPROTO_TCP, bind_addr, ifname, - port, tref.u32); - if (s >= 0) - tcp_sock_set_bufsize(c, s); + if (c->tcp.fwd_out.mode == FWD_AUTO) { + if (ns) + tcp_sock_ns[port][V4] = s; else - s = -1; - - if (c->tcp.fwd_out.mode == FWD_AUTO) { - if (ns) - tcp_sock_ns[port][V4] = s; - else - tcp_sock_init_lo[port][V4] = s; - } + tcp_sock_init_lo[port][V4] = s; } } +} + +/** + * tcp_sock_init6() - Initialise listening sockets for a given IPv6 port + * @c: Execution context + * @ns: In pasta mode, if set, bind with loopback address in namespace + * @addr: Pointer to address for binding, NULL if not configured + * @ifname: Name of interface to bind to, NULL if not configured + * @port: Port, host order + */ +static void tcp_sock_init6(const struct ctx *c, int ns, + const struct in6_addr *addr, const char *ifname, + in_port_t port) +{ + union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.outbound = ns, + .tcp.v6 = 1 }; + bool spliced = false, tap = true; + int s; + + if (c->mode == MODE_PASTA) { + spliced = !addr || + IN6_IS_ADDR_UNSPECIFIED(addr) || + IN6_IS_ADDR_LOOPBACK(addr); + + if (!addr) + addr = &c->ip6.addr; + + tap = !ns && !IN6_IS_ADDR_LOOPBACK(addr); + } + + if (ns) + tref.tcp.index = (in_port_t)(port + c->tcp.fwd_out.delta[port]); + else + tref.tcp.index = (in_port_t)(port + c->tcp.fwd_in.delta[port]); - if (af == AF_INET6 || af == AF_UNSPEC) { - if (!addr && c->mode == MODE_PASTA) - bind_addr = &c->ip6.addr; + if (tap) { + s = sock_l4(c, AF_INET6, IPPROTO_TCP, addr, ifname, port, + tref.u32); + if (s >= 0) + tcp_sock_set_bufsize(c, s); else - bind_addr = addr; + s = -1; - tref.tcp.v6 = 1; + if (c->tcp.fwd_in.mode == FWD_AUTO) + tcp_sock_init_ext[port][V6] = s; + } - tref.tcp.splice = 0; - if (!ns) { - s = sock_l4(c, AF_INET6, IPPROTO_TCP, bind_addr, ifname, - port, tref.u32); - if (s >= 0) - tcp_sock_set_bufsize(c, s); - else - s = -1; + if (spliced) { + tref.tcp.splice = 1; - if (c->tcp.fwd_in.mode == FWD_AUTO) - tcp_sock_init_ext[port][V6] = s; - } + addr = &in6addr_loopback; - if (c->mode == MODE_PASTA) { - bind_addr = &in6addr_loopback; + s = sock_l4(c, AF_INET6, IPPROTO_TCP, addr, ifname, port, + tref.u32); + if (s >= 0) + tcp_sock_set_bufsize(c, s); + else + s = -1; - tref.tcp.splice = 1; - s = sock_l4(c, AF_INET6, IPPROTO_TCP, bind_addr, ifname, - port, tref.u32); - if (s >= 0) - tcp_sock_set_bufsize(c, s); + if (c->tcp.fwd_out.mode == FWD_AUTO) { + if (ns) + tcp_sock_ns[port][V6] = s; else - s = -1; - - if (c->tcp.fwd_out.mode == FWD_AUTO) { - if (ns) - tcp_sock_ns[port][V6] = s; - else - tcp_sock_init_lo[port][V6] = s; - } + tcp_sock_init_lo[port][V6] = s; } } } +/** + * tcp_sock_init() - Initialise listening sockets for a given port + * @c: Execution context + * @ns: In pasta mode, if set, bind with loopback address in namespace + * @af: Address family to select a specific IP version, or AF_UNSPEC + * @addr: Pointer to address for binding, NULL if not configured + * @ifname: Name of interface to bind to, NULL if not configured + * @port: Port, host order + */ +void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af, + const void *addr, const char *ifname, in_port_t port) +{ + if (af == AF_INET || af == AF_UNSPEC) + tcp_sock_init4(c, ns, addr, ifname, port); + if (af == AF_INET6 || af == AF_UNSPEC) + tcp_sock_init6(c, ns, addr, ifname, port); +} + /** * tcp_sock_init_ns() - Bind sockets in namespace for outbound connections * @arg: Execution context diff --git a/util.h b/util.h index 7dc3d18..b1eadfd 100644 --- a/util.h +++ b/util.h @@ -81,6 +81,9 @@ #define MAC_ZERO ((uint8_t [ETH_ALEN]){ 0 }) #define MAC_IS_ZERO(addr) (!memcmp((addr), MAC_ZERO, ETH_ALEN)) +#define IPV4_IS_LOOPBACK(addr) \ + ((addr) >> IN_CLASSA_NSHIFT == IN_LOOPBACKNET) + #define NS_FN_STACK_SIZE (RLIMIT_STACK_VAL * 1024 / 4) #define NS_CALL(fn, arg) \ do { \ -- 2.35.1