From mboxrd@z Thu Jan 1 00:00:00 1970 Received: by passt.top (Postfix, from userid 1000) id EB8425A0267; Tue, 11 Oct 2022 01:32:20 +0200 (CEST) From: Stefano Brivio To: passt-dev@passt.top Subject: [PATCH v3] conf, tcp, udp: Allow specification of interface to bind to Date: Tue, 11 Oct 2022 01:32:20 +0200 Message-Id: <20221010233220.1198263-1-sbrivio@redhat.com> X-Mailer: git-send-email 2.35.1 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Message-ID-Hash: GTSCIM6MZ47Q44APJRNRCCRDGTCODRCS X-Message-ID-Hash: GTSCIM6MZ47Q44APJRNRCCRDGTCODRCS X-MailFrom: sbrivio@passt.top X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.3 Precedence: list List-Id: Development discussion and patches for passt Archived-At: <> Archived-At: List-Archive: <> List-Archive: List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: Since kernel version 5.7, commit c427bfec18f2 ("net: core: enable SO_BINDTODEVICE for non-root users"), we can bind sockets to interfaces, if they haven't been bound yet (as in bind()). Introduce an optional interface specification for forwarded ports, prefixed by %, that can be passed together with an address. Reported use case: running local services that use ports we want to have externally forwarded: https://github.com/containers/podman/issues/14425 Signed-off-by: Stefano Brivio --- v3: - escape % characters in usage() formatting v2: - fix check on interface name length (spec - ifname, not ifname - buf) conf.c | 31 +++++++++++++++++++++---------- icmp.c | 4 ++-- passt.1 | 12 ++++++++++-- tcp.c | 27 +++++++++++++++------------ tcp.h | 2 +- udp.c | 35 ++++++++++++++++++----------------- udp.h | 2 +- util.c | 19 ++++++++++++++++++- util.h | 3 ++- 9 files changed, 88 insertions(+), 47 deletions(-) diff --git a/conf.c b/conf.c index 779371f..93ca0cd 100644 --- a/conf.c +++ b/conf.c @@ -180,8 +180,8 @@ static int conf_ports(const struct ctx *c, char optname, const char *optarg, struct port_fwd *fwd) { char addr_buf[sizeof(struct in6_addr)] = { 0 }, *addr = addr_buf; + char buf[BUFSIZ], *spec, *ifname = NULL, *p; uint8_t exclude[PORT_BITMAP_SIZE] = { 0 }; - char buf[BUFSIZ], *spec, *p; sa_family_t af = AF_UNSPEC; bool exclude_only = true; @@ -209,9 +209,9 @@ static int conf_ports(const struct ctx *c, char optname, const char *optarg, for (i = 0; i < PORT_EPHEMERAL_MIN; i++) { if (optname == 't') - tcp_sock_init(c, 0, AF_UNSPEC, NULL, i); + tcp_sock_init(c, 0, AF_UNSPEC, NULL, NULL, i); else if (optname == 'u') - udp_sock_init(c, 0, AF_UNSPEC, NULL, i); + udp_sock_init(c, 0, AF_UNSPEC, NULL, NULL, i); } return 0; @@ -231,6 +231,14 @@ static int conf_ports(const struct ctx *c, char optname, const char *optarg, if (optname != 't' && optname != 'u') goto bad; + if ((ifname = strchr(buf, '%'))) { + if (spec - ifname >= IFNAMSIZ - 1) + goto bad; + + *ifname = 0; + ifname++; + } + if (inet_pton(AF_INET, buf, addr)) af = AF_INET; else if (inet_pton(AF_INET6, buf, addr)) @@ -278,9 +286,9 @@ static int conf_ports(const struct ctx *c, char optname, const char *optarg, bitmap_set(fwd->map, i); if (optname == 't') - tcp_sock_init(c, 0, af, addr, i); + tcp_sock_init(c, 0, af, addr, NULL, i); else if (optname == 'u') - udp_sock_init(c, 0, af, addr, i); + udp_sock_init(c, 0, af, addr, NULL, i); } return 0; @@ -324,9 +332,9 @@ static int conf_ports(const struct ctx *c, char optname, const char *optarg, fwd->delta[i] = mapped_range.first - orig_range.first; if (optname == 't') - tcp_sock_init(c, 0, af, addr, i); + tcp_sock_init(c, 0, af, addr, ifname, i); else if (optname == 'u') - udp_sock_init(c, 0, af, addr, i); + udp_sock_init(c, 0, af, addr, ifname, i); } } while ((p = next_chunk(p, ','))); @@ -720,8 +728,9 @@ static void usage(const char *name) info( " 'all': forward all unbound, non-ephemeral ports"); info( " a comma-separated list, optionally ranged with '-'"); info( " and optional target ports after ':', with optional"); - info( " address specification suffixed by '/'. Ranges can be"); - info( " reduced by excluding ports or ranges prefixed by '~'"); + info( " address specification suffixed by '/' and optional"); + info( " interface prefixed by '%%'. Ranges can be reduced by"); + info( " excluding ports or ranges prefixed by '~'"); info( " Examples:"); info( " -t 22 Forward local port 22 to 22 on guest"); info( " -t 22:23 Forward local port 22 to 23 on guest"); @@ -740,6 +749,7 @@ static void usage(const char *name) exit(EXIT_FAILURE); pasta_opts: + info( " -t, --tcp-ports SPEC TCP port forwarding to namespace"); info( " can be specified multiple times"); info( " SPEC can be:"); @@ -747,7 +757,8 @@ pasta_opts: info( " 'auto': forward all ports currently bound in namespace"); info( " a comma-separated list, optionally ranged with '-'"); info( " and optional target ports after ':', with optional"); - info( " address specification suffixed by '/'. Examples:"); + info( " address specification suffixed by '/' and optional"); + info( " interface prefixed by '%%'. Examples:"); info( " -t 22 Forward local port 22 to port 22 in netns"); info( " -t 22:23 Forward local port 22 to port 23"); info( " -t 22,25 Forward ports 22, 25 to ports 22, 25"); diff --git a/icmp.c b/icmp.c index f02f89f..6bd87fd 100644 --- a/icmp.c +++ b/icmp.c @@ -169,7 +169,7 @@ int icmp_tap_handler(const struct ctx *c, int af, const void *addr, iref.icmp.id = id = ntohs(ih->un.echo.id); if ((s = icmp_id_map[V4][id].sock) <= 0) { - s = sock_l4(c, AF_INET, IPPROTO_ICMP, NULL, id, + s = sock_l4(c, AF_INET, IPPROTO_ICMP, NULL, NULL, id, iref.u32); if (s < 0) goto fail_sock; @@ -207,7 +207,7 @@ int icmp_tap_handler(const struct ctx *c, int af, const void *addr, iref.icmp.id = id = ntohs(ih->icmp6_identifier); if ((s = icmp_id_map[V6][id].sock) <= 0) { - s = sock_l4(c, AF_INET6, IPPROTO_ICMPV6, NULL, id, + s = sock_l4(c, AF_INET6, IPPROTO_ICMPV6, NULL, NULL, id, iref.u32); if (s < 0) goto fail_sock; diff --git a/passt.1 b/passt.1 index 555a50c..7d113f2 100644 --- a/passt.1 +++ b/passt.1 @@ -325,7 +325,8 @@ For low (< 1024) ports, see \fBNOTES\fR. .BR ports A comma-separated list of ports, optionally ranged with \fI-\fR, and, optionally, with target ports after \fI:\fR, if they differ. Specific addresses -can be bound as well, separated by \fI/\fR. Within given ranges, selected ports +can be bound as well, separated by \fI/\fR, and also, since Linux 5.7, limited +to specific interfaces, prefixed by \fI%\fR. Within given ranges, selected ports and ranges can be excluded by an additional specification prefixed by \fI~\fR. Specifying excluded ranges only implies that all other ports are forwarded. Examples: @@ -349,6 +350,9 @@ Forward local ports 22 to 80 to corresponding ports on the guest plus 10 -t 192.0.2.1/22 Forward local port 22, bound to 192.0.2.1, to port 22 on the guest .TP +-t 192.0.2.1%eth0/22 +Forward local port 22, bound to 192.0.2.1 and interface eth0, to port 22 +.TP -t 2000-5000,~3000-3010 Forward local ports 2000 to 5000, but not 3000 to 3010 .TP @@ -399,7 +403,8 @@ periodically derived (every second) from listening sockets reported by .BR ports A comma-separated list of ports, optionally ranged with \fI-\fR, and, optionally, with target ports after \fI:\fR, if they differ. Specific addresses -can be bound as well, separated by \fI/\fR. Within given ranges, selected ports +can be bound as well, separated by \fI/\fR, and also, since Linux 5.7, limited +to specific interfaces, prefixed by \fI%\fR. Within given ranges, selected ports and ranges can be excluded by an additional specification prefixed by \fI~\fR. Specifying excluded ranges only implies that all other ports are forwarded. Examples: @@ -424,6 +429,9 @@ namespace -t 192.0.2.1/22 Forward local port 22, bound to 192.0.2.1, to port 22 in the target namespace .TP +-t 192.0.2.1%eth0/22 +Forward local port 22, bound to 192.0.2.1 and interface eth0, to port 22 +.TP -t 2000-5000,~3000-3010 Forward local ports 2000 to 5000, but not 3000 to 3010 .TP diff --git a/tcp.c b/tcp.c index 830dc88..7e82589 100644 --- a/tcp.c +++ b/tcp.c @@ -3078,10 +3078,11 @@ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events, * @ns: In pasta mode, if set, bind with loopback address in namespace * @af: Address family to select a specific IP version, or AF_UNSPEC * @addr: Pointer to address for binding, NULL if not configured + * @ifname: Name of interface to bind to, NULL if not configured * @port: Port, host order */ void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af, - const void *addr, in_port_t port) + const void *addr, const char *ifname, in_port_t port) { union tcp_epoll_ref tref = { .tcp.listen = 1 }; const void *bind_addr; @@ -3103,8 +3104,8 @@ void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af, tref.tcp.splice = 0; if (!ns) { - s = sock_l4(c, AF_INET, IPPROTO_TCP, bind_addr, port, - tref.u32); + s = sock_l4(c, AF_INET, IPPROTO_TCP, bind_addr, ifname, + port, tref.u32); if (s >= 0) tcp_sock_set_bufsize(c, s); else @@ -3118,8 +3119,8 @@ void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af, bind_addr = &(uint32_t){ htonl(INADDR_LOOPBACK) }; tref.tcp.splice = 1; - s = sock_l4(c, AF_INET, IPPROTO_TCP, bind_addr, port, - tref.u32); + s = sock_l4(c, AF_INET, IPPROTO_TCP, bind_addr, ifname, + port, tref.u32); if (s >= 0) tcp_sock_set_bufsize(c, s); else @@ -3144,8 +3145,8 @@ void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af, tref.tcp.splice = 0; if (!ns) { - s = sock_l4(c, AF_INET6, IPPROTO_TCP, bind_addr, port, - tref.u32); + s = sock_l4(c, AF_INET6, IPPROTO_TCP, bind_addr, ifname, + port, tref.u32); if (s >= 0) tcp_sock_set_bufsize(c, s); else @@ -3159,8 +3160,8 @@ void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af, bind_addr = &in6addr_loopback; tref.tcp.splice = 1; - s = sock_l4(c, AF_INET6, IPPROTO_TCP, bind_addr, port, - tref.u32); + s = sock_l4(c, AF_INET6, IPPROTO_TCP, bind_addr, ifname, + port, tref.u32); if (s >= 0) tcp_sock_set_bufsize(c, s); else @@ -3193,7 +3194,7 @@ static int tcp_sock_init_ns(void *arg) if (!bitmap_isset(c->tcp.fwd_out.map, port)) continue; - tcp_sock_init(c, 1, AF_UNSPEC, NULL, port); + tcp_sock_init(c, 1, AF_UNSPEC, NULL, NULL, port); } return 0; @@ -3410,7 +3411,8 @@ static int tcp_port_rebind(void *arg) if ((a->c->ifi4 && tcp_sock_ns[port][V4] == -1) || (a->c->ifi6 && tcp_sock_ns[port][V6] == -1)) - tcp_sock_init(a->c, 1, AF_UNSPEC, NULL, port); + tcp_sock_init(a->c, 1, AF_UNSPEC, NULL, NULL, + port); } } else { for (port = 0; port < NUM_PORTS; port++) { @@ -3443,7 +3445,8 @@ static int tcp_port_rebind(void *arg) if ((a->c->ifi4 && tcp_sock_init_ext[port][V4] == -1) || (a->c->ifi6 && tcp_sock_init_ext[port][V6] == -1)) - tcp_sock_init(a->c, 0, AF_UNSPEC, NULL, port); + tcp_sock_init(a->c, 0, AF_UNSPEC, NULL, NULL, + port); } } diff --git a/tcp.h b/tcp.h index 2548d4d..7ba7ab7 100644 --- a/tcp.h +++ b/tcp.h @@ -21,7 +21,7 @@ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events, int tcp_tap_handler(struct ctx *c, int af, const void *addr, const struct pool *p, const struct timespec *now); void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af, - const void *addr, in_port_t port); + const void *addr, const char *ifname, in_port_t port); int tcp_init(struct ctx *c); void tcp_timer(struct ctx *c, const struct timespec *ts); void tcp_defer_handler(struct ctx *c); diff --git a/udp.c b/udp.c index 5422fdd..cac9c65 100644 --- a/udp.c +++ b/udp.c @@ -1005,7 +1005,7 @@ int udp_tap_handler(struct ctx *c, int af, const void *addr, union udp_epoll_ref uref = { .udp.bound = 1, .udp.port = src }; - s = sock_l4(c, AF_INET, IPPROTO_UDP, NULL, src, + s = sock_l4(c, AF_INET, IPPROTO_UDP, NULL, NULL, src, uref.u32); if (s < 0) return p->count; @@ -1057,8 +1057,8 @@ int udp_tap_handler(struct ctx *c, int af, const void *addr, .udp.v6 = 1, .udp.port = src }; - s = sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, src, - uref.u32); + s = sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, NULL, + src, uref.u32); if (s < 0) return p->count; @@ -1111,10 +1111,11 @@ int udp_tap_handler(struct ctx *c, int af, const void *addr, * @ns: In pasta mode, if set, bind with loopback address in namespace * @af: Address family to select a specific IP version, or AF_UNSPEC * @addr: Pointer to address for binding, NULL if not configured + * @ifname: Name of interface to bind to, NULL if not configured * @port: Port, host order */ void udp_sock_init(const struct ctx *c, int ns, sa_family_t af, - const void *addr, in_port_t port) + const void *addr, const char *ifname, in_port_t port) { union udp_epoll_ref uref = { .udp.bound = 1 }; const void *bind_addr; @@ -1138,8 +1139,8 @@ void udp_sock_init(const struct ctx *c, int ns, sa_family_t af, if (!ns) { uref.udp.splice = 0; - s = sock_l4(c, AF_INET, IPPROTO_UDP, bind_addr, port, - uref.u32); + s = sock_l4(c, AF_INET, IPPROTO_UDP, bind_addr, ifname, + port, uref.u32); udp_tap_map[V4][uref.udp.port].sock = s; } @@ -1148,16 +1149,16 @@ void udp_sock_init(const struct ctx *c, int ns, sa_family_t af, bind_addr = &(uint32_t){ htonl(INADDR_LOOPBACK) }; uref.udp.splice = UDP_TO_NS; - sock_l4(c, AF_INET, IPPROTO_UDP, bind_addr, port, - uref.u32); + sock_l4(c, AF_INET, IPPROTO_UDP, bind_addr, ifname, + port, uref.u32); } if (ns) { bind_addr = &(uint32_t){ htonl(INADDR_LOOPBACK) }; uref.udp.splice = UDP_TO_INIT; - sock_l4(c, AF_INET, IPPROTO_UDP, bind_addr, port, - uref.u32); + sock_l4(c, AF_INET, IPPROTO_UDP, bind_addr, ifname, + port, uref.u32); } } @@ -1171,8 +1172,8 @@ void udp_sock_init(const struct ctx *c, int ns, sa_family_t af, if (!ns) { uref.udp.splice = 0; - s = sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, port, - uref.u32); + s = sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, ifname, + port, uref.u32); udp_tap_map[V6][uref.udp.port].sock = s; } @@ -1181,16 +1182,16 @@ void udp_sock_init(const struct ctx *c, int ns, sa_family_t af, bind_addr = &in6addr_loopback; uref.udp.splice = UDP_TO_NS; - sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, port, - uref.u32); + sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, ifname, + port, uref.u32); } if (ns) { bind_addr = &in6addr_loopback; uref.udp.splice = UDP_TO_INIT; - sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, port, - uref.u32); + sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, ifname, + port, uref.u32); } } } @@ -1213,7 +1214,7 @@ int udp_sock_init_ns(void *arg) if (!bitmap_isset(c->udp.fwd_out.f.map, dst)) continue; - udp_sock_init(c, 1, AF_UNSPEC, NULL, dst); + udp_sock_init(c, 1, AF_UNSPEC, NULL, NULL, dst); } return 0; diff --git a/udp.h b/udp.h index d14df0a..b4ee8b7 100644 --- a/udp.h +++ b/udp.h @@ -13,7 +13,7 @@ void udp_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events, int udp_tap_handler(struct ctx *c, int af, const void *addr, const struct pool *p, const struct timespec *now); void udp_sock_init(const struct ctx *c, int ns, sa_family_t af, - const void *addr, in_port_t port); + const void *addr, const char *ifname, in_port_t port); int udp_init(struct ctx *c); void udp_timer(struct ctx *c, const struct timespec *ts); void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s, diff --git a/util.c b/util.c index 5b1e08a..76057be 100644 --- a/util.c +++ b/util.c @@ -90,13 +90,15 @@ found: * @af: Address family, AF_INET or AF_INET6 * @proto: Protocol number * @bind_addr: Address for binding, NULL for any + * @ifname: Interface for binding, NULL for any * @port: Port, host order * @data: epoll reference portion for protocol handlers * * Return: newly created socket, -1 on error */ int sock_l4(const struct ctx *c, int af, uint8_t proto, - const void *bind_addr, uint16_t port, uint32_t data) + const void *bind_addr, const char *ifname, uint16_t port, + uint32_t data) { union epoll_ref ref = { .r.proto = proto, .r.p.data = data }; struct sockaddr_in addr4 = { @@ -163,6 +165,21 @@ int sock_l4(const struct ctx *c, int af, uint8_t proto, if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y))) debug("Failed to set SO_REUSEADDR on socket %i", fd); + if (ifname) { + /* Supported since kernel version 5.7, commit c427bfec18f2 + * ("net: core: enable SO_BINDTODEVICE for non-root users"). If + * it's unsupported, don't bind the socket at all, because the + * user might rely on this to filter incoming connections. + */ + if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, + ifname, strlen(ifname))) { + warn("Can't bind socket for %s port %u to %s, closing", + ip_proto_str[proto], port, ifname); + close(fd); + return -1; + } + } + if (bind(fd, sa, sl) < 0) { /* We'll fail to bind to low ports if we don't have enough * capabilities, and we'll fail to bind on already bound ports, diff --git a/util.h b/util.h index f9a8ec6..7dc3d18 100644 --- a/util.h +++ b/util.h @@ -194,7 +194,8 @@ __attribute__ ((weak)) int ffsl(long int i) { return __builtin_ffsl(i); } char *ipv6_l4hdr(const struct pool *p, int index, size_t offset, uint8_t *proto, size_t *dlen); int sock_l4(const struct ctx *c, int af, uint8_t proto, - const void *bind_addr, uint16_t port, uint32_t data); + const void *bind_addr, const char *ifname, uint16_t port, + uint32_t data); void sock_probe_mem(struct ctx *c); int timespec_diff_ms(const struct timespec *a, const struct timespec *b); void bitmap_set(uint8_t *map, int bit); -- 2.35.1