From: Stefano Brivio <sbrivio@redhat.com>
To: passt-dev@passt.top
Subject: [PATCH v3] conf, tcp, udp: Allow specification of interface to bind to
Date: Tue, 11 Oct 2022 01:32:20 +0200 [thread overview]
Message-ID: <20221010233220.1198263-1-sbrivio@redhat.com> (raw)
Since kernel version 5.7, commit c427bfec18f2 ("net: core: enable
SO_BINDTODEVICE for non-root users"), we can bind sockets to
interfaces, if they haven't been bound yet (as in bind()).
Introduce an optional interface specification for forwarded ports,
prefixed by %, that can be passed together with an address.
Reported use case: running local services that use ports we want
to have externally forwarded:
https://github.com/containers/podman/issues/14425
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
---
v3:
- escape % characters in usage() formatting
v2:
- fix check on interface name length (spec - ifname, not
ifname - buf)
conf.c | 31 +++++++++++++++++++++----------
icmp.c | 4 ++--
passt.1 | 12 ++++++++++--
tcp.c | 27 +++++++++++++++------------
tcp.h | 2 +-
udp.c | 35 ++++++++++++++++++-----------------
udp.h | 2 +-
util.c | 19 ++++++++++++++++++-
util.h | 3 ++-
9 files changed, 88 insertions(+), 47 deletions(-)
diff --git a/conf.c b/conf.c
index 779371f..93ca0cd 100644
--- a/conf.c
+++ b/conf.c
@@ -180,8 +180,8 @@ static int conf_ports(const struct ctx *c, char optname, const char *optarg,
struct port_fwd *fwd)
{
char addr_buf[sizeof(struct in6_addr)] = { 0 }, *addr = addr_buf;
+ char buf[BUFSIZ], *spec, *ifname = NULL, *p;
uint8_t exclude[PORT_BITMAP_SIZE] = { 0 };
- char buf[BUFSIZ], *spec, *p;
sa_family_t af = AF_UNSPEC;
bool exclude_only = true;
@@ -209,9 +209,9 @@ static int conf_ports(const struct ctx *c, char optname, const char *optarg,
for (i = 0; i < PORT_EPHEMERAL_MIN; i++) {
if (optname == 't')
- tcp_sock_init(c, 0, AF_UNSPEC, NULL, i);
+ tcp_sock_init(c, 0, AF_UNSPEC, NULL, NULL, i);
else if (optname == 'u')
- udp_sock_init(c, 0, AF_UNSPEC, NULL, i);
+ udp_sock_init(c, 0, AF_UNSPEC, NULL, NULL, i);
}
return 0;
@@ -231,6 +231,14 @@ static int conf_ports(const struct ctx *c, char optname, const char *optarg,
if (optname != 't' && optname != 'u')
goto bad;
+ if ((ifname = strchr(buf, '%'))) {
+ if (spec - ifname >= IFNAMSIZ - 1)
+ goto bad;
+
+ *ifname = 0;
+ ifname++;
+ }
+
if (inet_pton(AF_INET, buf, addr))
af = AF_INET;
else if (inet_pton(AF_INET6, buf, addr))
@@ -278,9 +286,9 @@ static int conf_ports(const struct ctx *c, char optname, const char *optarg,
bitmap_set(fwd->map, i);
if (optname == 't')
- tcp_sock_init(c, 0, af, addr, i);
+ tcp_sock_init(c, 0, af, addr, NULL, i);
else if (optname == 'u')
- udp_sock_init(c, 0, af, addr, i);
+ udp_sock_init(c, 0, af, addr, NULL, i);
}
return 0;
@@ -324,9 +332,9 @@ static int conf_ports(const struct ctx *c, char optname, const char *optarg,
fwd->delta[i] = mapped_range.first - orig_range.first;
if (optname == 't')
- tcp_sock_init(c, 0, af, addr, i);
+ tcp_sock_init(c, 0, af, addr, ifname, i);
else if (optname == 'u')
- udp_sock_init(c, 0, af, addr, i);
+ udp_sock_init(c, 0, af, addr, ifname, i);
}
} while ((p = next_chunk(p, ',')));
@@ -720,8 +728,9 @@ static void usage(const char *name)
info( " 'all': forward all unbound, non-ephemeral ports");
info( " a comma-separated list, optionally ranged with '-'");
info( " and optional target ports after ':', with optional");
- info( " address specification suffixed by '/'. Ranges can be");
- info( " reduced by excluding ports or ranges prefixed by '~'");
+ info( " address specification suffixed by '/' and optional");
+ info( " interface prefixed by '%%'. Ranges can be reduced by");
+ info( " excluding ports or ranges prefixed by '~'");
info( " Examples:");
info( " -t 22 Forward local port 22 to 22 on guest");
info( " -t 22:23 Forward local port 22 to 23 on guest");
@@ -740,6 +749,7 @@ static void usage(const char *name)
exit(EXIT_FAILURE);
pasta_opts:
+
info( " -t, --tcp-ports SPEC TCP port forwarding to namespace");
info( " can be specified multiple times");
info( " SPEC can be:");
@@ -747,7 +757,8 @@ pasta_opts:
info( " 'auto': forward all ports currently bound in namespace");
info( " a comma-separated list, optionally ranged with '-'");
info( " and optional target ports after ':', with optional");
- info( " address specification suffixed by '/'. Examples:");
+ info( " address specification suffixed by '/' and optional");
+ info( " interface prefixed by '%%'. Examples:");
info( " -t 22 Forward local port 22 to port 22 in netns");
info( " -t 22:23 Forward local port 22 to port 23");
info( " -t 22,25 Forward ports 22, 25 to ports 22, 25");
diff --git a/icmp.c b/icmp.c
index f02f89f..6bd87fd 100644
--- a/icmp.c
+++ b/icmp.c
@@ -169,7 +169,7 @@ int icmp_tap_handler(const struct ctx *c, int af, const void *addr,
iref.icmp.id = id = ntohs(ih->un.echo.id);
if ((s = icmp_id_map[V4][id].sock) <= 0) {
- s = sock_l4(c, AF_INET, IPPROTO_ICMP, NULL, id,
+ s = sock_l4(c, AF_INET, IPPROTO_ICMP, NULL, NULL, id,
iref.u32);
if (s < 0)
goto fail_sock;
@@ -207,7 +207,7 @@ int icmp_tap_handler(const struct ctx *c, int af, const void *addr,
iref.icmp.id = id = ntohs(ih->icmp6_identifier);
if ((s = icmp_id_map[V6][id].sock) <= 0) {
- s = sock_l4(c, AF_INET6, IPPROTO_ICMPV6, NULL, id,
+ s = sock_l4(c, AF_INET6, IPPROTO_ICMPV6, NULL, NULL, id,
iref.u32);
if (s < 0)
goto fail_sock;
diff --git a/passt.1 b/passt.1
index 555a50c..7d113f2 100644
--- a/passt.1
+++ b/passt.1
@@ -325,7 +325,8 @@ For low (< 1024) ports, see \fBNOTES\fR.
.BR ports
A comma-separated list of ports, optionally ranged with \fI-\fR, and,
optionally, with target ports after \fI:\fR, if they differ. Specific addresses
-can be bound as well, separated by \fI/\fR. Within given ranges, selected ports
+can be bound as well, separated by \fI/\fR, and also, since Linux 5.7, limited
+to specific interfaces, prefixed by \fI%\fR. Within given ranges, selected ports
and ranges can be excluded by an additional specification prefixed by \fI~\fR.
Specifying excluded ranges only implies that all other ports are forwarded.
Examples:
@@ -349,6 +350,9 @@ Forward local ports 22 to 80 to corresponding ports on the guest plus 10
-t 192.0.2.1/22
Forward local port 22, bound to 192.0.2.1, to port 22 on the guest
.TP
+-t 192.0.2.1%eth0/22
+Forward local port 22, bound to 192.0.2.1 and interface eth0, to port 22
+.TP
-t 2000-5000,~3000-3010
Forward local ports 2000 to 5000, but not 3000 to 3010
.TP
@@ -399,7 +403,8 @@ periodically derived (every second) from listening sockets reported by
.BR ports
A comma-separated list of ports, optionally ranged with \fI-\fR, and,
optionally, with target ports after \fI:\fR, if they differ. Specific addresses
-can be bound as well, separated by \fI/\fR. Within given ranges, selected ports
+can be bound as well, separated by \fI/\fR, and also, since Linux 5.7, limited
+to specific interfaces, prefixed by \fI%\fR. Within given ranges, selected ports
and ranges can be excluded by an additional specification prefixed by \fI~\fR.
Specifying excluded ranges only implies that all other ports are forwarded.
Examples:
@@ -424,6 +429,9 @@ namespace
-t 192.0.2.1/22
Forward local port 22, bound to 192.0.2.1, to port 22 in the target namespace
.TP
+-t 192.0.2.1%eth0/22
+Forward local port 22, bound to 192.0.2.1 and interface eth0, to port 22
+.TP
-t 2000-5000,~3000-3010
Forward local ports 2000 to 5000, but not 3000 to 3010
.TP
diff --git a/tcp.c b/tcp.c
index 830dc88..7e82589 100644
--- a/tcp.c
+++ b/tcp.c
@@ -3078,10 +3078,11 @@ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
* @ns: In pasta mode, if set, bind with loopback address in namespace
* @af: Address family to select a specific IP version, or AF_UNSPEC
* @addr: Pointer to address for binding, NULL if not configured
+ * @ifname: Name of interface to bind to, NULL if not configured
* @port: Port, host order
*/
void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
- const void *addr, in_port_t port)
+ const void *addr, const char *ifname, in_port_t port)
{
union tcp_epoll_ref tref = { .tcp.listen = 1 };
const void *bind_addr;
@@ -3103,8 +3104,8 @@ void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
tref.tcp.splice = 0;
if (!ns) {
- s = sock_l4(c, AF_INET, IPPROTO_TCP, bind_addr, port,
- tref.u32);
+ s = sock_l4(c, AF_INET, IPPROTO_TCP, bind_addr, ifname,
+ port, tref.u32);
if (s >= 0)
tcp_sock_set_bufsize(c, s);
else
@@ -3118,8 +3119,8 @@ void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
bind_addr = &(uint32_t){ htonl(INADDR_LOOPBACK) };
tref.tcp.splice = 1;
- s = sock_l4(c, AF_INET, IPPROTO_TCP, bind_addr, port,
- tref.u32);
+ s = sock_l4(c, AF_INET, IPPROTO_TCP, bind_addr, ifname,
+ port, tref.u32);
if (s >= 0)
tcp_sock_set_bufsize(c, s);
else
@@ -3144,8 +3145,8 @@ void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
tref.tcp.splice = 0;
if (!ns) {
- s = sock_l4(c, AF_INET6, IPPROTO_TCP, bind_addr, port,
- tref.u32);
+ s = sock_l4(c, AF_INET6, IPPROTO_TCP, bind_addr, ifname,
+ port, tref.u32);
if (s >= 0)
tcp_sock_set_bufsize(c, s);
else
@@ -3159,8 +3160,8 @@ void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
bind_addr = &in6addr_loopback;
tref.tcp.splice = 1;
- s = sock_l4(c, AF_INET6, IPPROTO_TCP, bind_addr, port,
- tref.u32);
+ s = sock_l4(c, AF_INET6, IPPROTO_TCP, bind_addr, ifname,
+ port, tref.u32);
if (s >= 0)
tcp_sock_set_bufsize(c, s);
else
@@ -3193,7 +3194,7 @@ static int tcp_sock_init_ns(void *arg)
if (!bitmap_isset(c->tcp.fwd_out.map, port))
continue;
- tcp_sock_init(c, 1, AF_UNSPEC, NULL, port);
+ tcp_sock_init(c, 1, AF_UNSPEC, NULL, NULL, port);
}
return 0;
@@ -3410,7 +3411,8 @@ static int tcp_port_rebind(void *arg)
if ((a->c->ifi4 && tcp_sock_ns[port][V4] == -1) ||
(a->c->ifi6 && tcp_sock_ns[port][V6] == -1))
- tcp_sock_init(a->c, 1, AF_UNSPEC, NULL, port);
+ tcp_sock_init(a->c, 1, AF_UNSPEC, NULL, NULL,
+ port);
}
} else {
for (port = 0; port < NUM_PORTS; port++) {
@@ -3443,7 +3445,8 @@ static int tcp_port_rebind(void *arg)
if ((a->c->ifi4 && tcp_sock_init_ext[port][V4] == -1) ||
(a->c->ifi6 && tcp_sock_init_ext[port][V6] == -1))
- tcp_sock_init(a->c, 0, AF_UNSPEC, NULL, port);
+ tcp_sock_init(a->c, 0, AF_UNSPEC, NULL, NULL,
+ port);
}
}
diff --git a/tcp.h b/tcp.h
index 2548d4d..7ba7ab7 100644
--- a/tcp.h
+++ b/tcp.h
@@ -21,7 +21,7 @@ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
int tcp_tap_handler(struct ctx *c, int af, const void *addr,
const struct pool *p, const struct timespec *now);
void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
- const void *addr, in_port_t port);
+ const void *addr, const char *ifname, in_port_t port);
int tcp_init(struct ctx *c);
void tcp_timer(struct ctx *c, const struct timespec *ts);
void tcp_defer_handler(struct ctx *c);
diff --git a/udp.c b/udp.c
index 5422fdd..cac9c65 100644
--- a/udp.c
+++ b/udp.c
@@ -1005,7 +1005,7 @@ int udp_tap_handler(struct ctx *c, int af, const void *addr,
union udp_epoll_ref uref = { .udp.bound = 1,
.udp.port = src };
- s = sock_l4(c, AF_INET, IPPROTO_UDP, NULL, src,
+ s = sock_l4(c, AF_INET, IPPROTO_UDP, NULL, NULL, src,
uref.u32);
if (s < 0)
return p->count;
@@ -1057,8 +1057,8 @@ int udp_tap_handler(struct ctx *c, int af, const void *addr,
.udp.v6 = 1,
.udp.port = src };
- s = sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, src,
- uref.u32);
+ s = sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, NULL,
+ src, uref.u32);
if (s < 0)
return p->count;
@@ -1111,10 +1111,11 @@ int udp_tap_handler(struct ctx *c, int af, const void *addr,
* @ns: In pasta mode, if set, bind with loopback address in namespace
* @af: Address family to select a specific IP version, or AF_UNSPEC
* @addr: Pointer to address for binding, NULL if not configured
+ * @ifname: Name of interface to bind to, NULL if not configured
* @port: Port, host order
*/
void udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
- const void *addr, in_port_t port)
+ const void *addr, const char *ifname, in_port_t port)
{
union udp_epoll_ref uref = { .udp.bound = 1 };
const void *bind_addr;
@@ -1138,8 +1139,8 @@ void udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
if (!ns) {
uref.udp.splice = 0;
- s = sock_l4(c, AF_INET, IPPROTO_UDP, bind_addr, port,
- uref.u32);
+ s = sock_l4(c, AF_INET, IPPROTO_UDP, bind_addr, ifname,
+ port, uref.u32);
udp_tap_map[V4][uref.udp.port].sock = s;
}
@@ -1148,16 +1149,16 @@ void udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
bind_addr = &(uint32_t){ htonl(INADDR_LOOPBACK) };
uref.udp.splice = UDP_TO_NS;
- sock_l4(c, AF_INET, IPPROTO_UDP, bind_addr, port,
- uref.u32);
+ sock_l4(c, AF_INET, IPPROTO_UDP, bind_addr, ifname,
+ port, uref.u32);
}
if (ns) {
bind_addr = &(uint32_t){ htonl(INADDR_LOOPBACK) };
uref.udp.splice = UDP_TO_INIT;
- sock_l4(c, AF_INET, IPPROTO_UDP, bind_addr, port,
- uref.u32);
+ sock_l4(c, AF_INET, IPPROTO_UDP, bind_addr, ifname,
+ port, uref.u32);
}
}
@@ -1171,8 +1172,8 @@ void udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
if (!ns) {
uref.udp.splice = 0;
- s = sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, port,
- uref.u32);
+ s = sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, ifname,
+ port, uref.u32);
udp_tap_map[V6][uref.udp.port].sock = s;
}
@@ -1181,16 +1182,16 @@ void udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
bind_addr = &in6addr_loopback;
uref.udp.splice = UDP_TO_NS;
- sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, port,
- uref.u32);
+ sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, ifname,
+ port, uref.u32);
}
if (ns) {
bind_addr = &in6addr_loopback;
uref.udp.splice = UDP_TO_INIT;
- sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, port,
- uref.u32);
+ sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, ifname,
+ port, uref.u32);
}
}
}
@@ -1213,7 +1214,7 @@ int udp_sock_init_ns(void *arg)
if (!bitmap_isset(c->udp.fwd_out.f.map, dst))
continue;
- udp_sock_init(c, 1, AF_UNSPEC, NULL, dst);
+ udp_sock_init(c, 1, AF_UNSPEC, NULL, NULL, dst);
}
return 0;
diff --git a/udp.h b/udp.h
index d14df0a..b4ee8b7 100644
--- a/udp.h
+++ b/udp.h
@@ -13,7 +13,7 @@ void udp_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events,
int udp_tap_handler(struct ctx *c, int af, const void *addr,
const struct pool *p, const struct timespec *now);
void udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
- const void *addr, in_port_t port);
+ const void *addr, const char *ifname, in_port_t port);
int udp_init(struct ctx *c);
void udp_timer(struct ctx *c, const struct timespec *ts);
void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s,
diff --git a/util.c b/util.c
index 5b1e08a..76057be 100644
--- a/util.c
+++ b/util.c
@@ -90,13 +90,15 @@ found:
* @af: Address family, AF_INET or AF_INET6
* @proto: Protocol number
* @bind_addr: Address for binding, NULL for any
+ * @ifname: Interface for binding, NULL for any
* @port: Port, host order
* @data: epoll reference portion for protocol handlers
*
* Return: newly created socket, -1 on error
*/
int sock_l4(const struct ctx *c, int af, uint8_t proto,
- const void *bind_addr, uint16_t port, uint32_t data)
+ const void *bind_addr, const char *ifname, uint16_t port,
+ uint32_t data)
{
union epoll_ref ref = { .r.proto = proto, .r.p.data = data };
struct sockaddr_in addr4 = {
@@ -163,6 +165,21 @@ int sock_l4(const struct ctx *c, int af, uint8_t proto,
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)))
debug("Failed to set SO_REUSEADDR on socket %i", fd);
+ if (ifname) {
+ /* Supported since kernel version 5.7, commit c427bfec18f2
+ * ("net: core: enable SO_BINDTODEVICE for non-root users"). If
+ * it's unsupported, don't bind the socket at all, because the
+ * user might rely on this to filter incoming connections.
+ */
+ if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
+ ifname, strlen(ifname))) {
+ warn("Can't bind socket for %s port %u to %s, closing",
+ ip_proto_str[proto], port, ifname);
+ close(fd);
+ return -1;
+ }
+ }
+
if (bind(fd, sa, sl) < 0) {
/* We'll fail to bind to low ports if we don't have enough
* capabilities, and we'll fail to bind on already bound ports,
diff --git a/util.h b/util.h
index f9a8ec6..7dc3d18 100644
--- a/util.h
+++ b/util.h
@@ -194,7 +194,8 @@ __attribute__ ((weak)) int ffsl(long int i) { return __builtin_ffsl(i); }
char *ipv6_l4hdr(const struct pool *p, int index, size_t offset, uint8_t *proto,
size_t *dlen);
int sock_l4(const struct ctx *c, int af, uint8_t proto,
- const void *bind_addr, uint16_t port, uint32_t data);
+ const void *bind_addr, const char *ifname, uint16_t port,
+ uint32_t data);
void sock_probe_mem(struct ctx *c);
int timespec_diff_ms(const struct timespec *a, const struct timespec *b);
void bitmap_set(uint8_t *map, int bit);
--
@@ -194,7 +194,8 @@ __attribute__ ((weak)) int ffsl(long int i) { return __builtin_ffsl(i); }
char *ipv6_l4hdr(const struct pool *p, int index, size_t offset, uint8_t *proto,
size_t *dlen);
int sock_l4(const struct ctx *c, int af, uint8_t proto,
- const void *bind_addr, uint16_t port, uint32_t data);
+ const void *bind_addr, const char *ifname, uint16_t port,
+ uint32_t data);
void sock_probe_mem(struct ctx *c);
int timespec_diff_ms(const struct timespec *a, const struct timespec *b);
void bitmap_set(uint8_t *map, int bit);
--
2.35.1
next reply other threads:[~2022-10-10 23:32 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-10 23:32 Stefano Brivio [this message]
2022-10-11 0:48 ` [PATCH v3] conf, tcp, udp: Allow specification of interface to bind to David Gibson
2022-10-11 11:59 ` Stefano Brivio
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221010233220.1198263-1-sbrivio@redhat.com \
--to=sbrivio@redhat.com \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).