From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>, passt-dev@passt.top
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH 2/4] util, pif: Replace sock_l4() with pif_sock_l4()
Date: Fri, 20 Sep 2024 14:12:42 +1000 [thread overview]
Message-ID: <20240920041244.593192-3-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20240920041244.593192-1-david@gibson.dropbear.id.au>
The sock_l4() function is very convenient for creating sockets bound to
a given address, but its interface has some problems.
Most importantly, the address and port alone aren't enough in some cases.
For link-local addresses (at least) we also need the pif in order to
properly construct a socket adddress. This case doesn't yet arise, but
it might cause us trouble in future.
Additionally, sock_l4() can take AF_UNSPEC with the special meaning that it
should attempt to create a "dual stack" socket which will respond to both
IPv4 and IPv6 traffic. This only makes sense if there is no specific
address given. We verify this at runtime, but it would be nicer if we
could enforce it structurally.
For sockets associated specifically with a single flow we already replaced
sock_l4() with flowside_sock_l4() which avoids those problems. Now,
replace all the remaining users with a new pif_sock_l4() which also takes
an explicit pif.
The new function takes the address as an inany *, with NULL indicating the
dual stack case. This does add some complexity in some of the callers,
however future planned cleanups should make this go away again.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
pif.c | 42 ++++++++++++++++++++++++++++++++++++++++++
pif.h | 3 +++
tcp.c | 22 +++++++++++++++++-----
udp.c | 34 ++++++++++++++++++++++------------
util.c | 52 ----------------------------------------------------
util.h | 3 ---
6 files changed, 84 insertions(+), 72 deletions(-)
diff --git a/pif.c b/pif.c
index a099e31b..592fafaa 100644
--- a/pif.c
+++ b/pif.c
@@ -59,3 +59,45 @@ void pif_sockaddr(const struct ctx *c, union sockaddr_inany *sa, socklen_t *sl,
*sl = sizeof(sa->sa6);
}
}
+
+/** pif_sock_l4() - Open a socket bound to an address on a specified interface
+ * @c: Execution context
+ * @type: Socket epoll type
+ * @pif: Interface for this socket
+ * @addr: Address to bind to, or NULL for dual-stack any
+ * @ifname: Interface for binding, NULL for any
+ * @port: Port number to bind to (host byte order)
+ * @data: epoll reference portion for protocol handlers
+ *
+ * NOTE: For namespace pifs, this must be called having already entered the
+ * relevant namespace.
+ *
+ * Return: newly created socket, negative error code on failure
+ */
+int pif_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif,
+ const union inany_addr *addr, const char *ifname,
+ in_port_t port, uint32_t data)
+{
+ union sockaddr_inany sa = {
+ .sa6.sin6_family = AF_INET6,
+ .sa6.sin6_addr = in6addr_any,
+ .sa6.sin6_port = htons(port),
+ };
+ socklen_t sl;
+
+ ASSERT(pif_is_socket(pif));
+
+ if (pif == PIF_SPLICE) {
+ /* Sanity checks */
+ ASSERT(!ifname);
+ ASSERT(addr && inany_is_loopback(addr));
+ }
+
+ if (!addr)
+ return sock_l4_sa(c, type, &sa, sizeof(sa.sa6),
+ ifname, false, data);
+
+ pif_sockaddr(c, &sa, &sl, pif, addr, port);
+ return sock_l4_sa(c, type, &sa, sl,
+ ifname, sa.sa_family == AF_INET6, data);
+}
diff --git a/pif.h b/pif.h
index 8777bb5b..f029282b 100644
--- a/pif.h
+++ b/pif.h
@@ -59,5 +59,8 @@ static inline bool pif_is_socket(uint8_t pif)
void pif_sockaddr(const struct ctx *c, union sockaddr_inany *sa, socklen_t *sl,
uint8_t pif, const union inany_addr *addr, in_port_t port);
+int pif_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif,
+ const union inany_addr *addr, const char *ifname,
+ in_port_t port, uint32_t data);
#endif /* PIF_H */
diff --git a/tcp.c b/tcp.c
index 1962fcc4..49e0cfe3 100644
--- a/tcp.c
+++ b/tcp.c
@@ -2291,7 +2291,19 @@ static int tcp_sock_init_af(const struct ctx *c, sa_family_t af, in_port_t port,
};
int s;
- s = sock_l4(c, af, EPOLL_TYPE_TCP_LISTEN, addr, ifname, port, tref.u32);
+ if (af == AF_UNSPEC) {
+ ASSERT(!addr);
+ s = pif_sock_l4(c, EPOLL_TYPE_TCP_LISTEN, PIF_HOST, NULL,
+ ifname, port, tref.u32);
+ } else {
+ union inany_addr aany = af == AF_INET ? inany_any4 : inany_any6;
+
+ if (addr)
+ inany_from_af(&aany, af, addr);
+
+ s = pif_sock_l4(c, EPOLL_TYPE_TCP_LISTEN, PIF_HOST, &aany,
+ ifname, port, tref.u32);
+ }
if (c->tcp.fwd_in.mode == FWD_AUTO) {
if (af == AF_INET || af == AF_UNSPEC)
@@ -2357,8 +2369,8 @@ static void tcp_ns_sock_init4(const struct ctx *c, in_port_t port)
ASSERT(c->mode == MODE_PASTA);
- s = sock_l4(c, AF_INET, EPOLL_TYPE_TCP_LISTEN, &in4addr_loopback,
- NULL, port, tref.u32);
+ s = pif_sock_l4(c, EPOLL_TYPE_TCP_LISTEN, PIF_SPLICE, &inany_loopback4,
+ NULL, port, tref.u32);
if (s >= 0)
tcp_sock_set_bufsize(c, s);
else
@@ -2383,8 +2395,8 @@ static void tcp_ns_sock_init6(const struct ctx *c, in_port_t port)
ASSERT(c->mode == MODE_PASTA);
- s = sock_l4(c, AF_INET6, EPOLL_TYPE_TCP_LISTEN, &in6addr_loopback,
- NULL, port, tref.u32);
+ s = pif_sock_l4(c, EPOLL_TYPE_TCP_LISTEN, PIF_SPLICE, &inany_loopback6,
+ NULL, port, tref.u32);
if (s >= 0)
tcp_sock_set_bufsize(c, s);
else
diff --git a/udp.c b/udp.c
index 8cea80cd..b3d4a64c 100644
--- a/udp.c
+++ b/udp.c
@@ -809,8 +809,8 @@ int udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
ASSERT(!addr);
/* Attempt to get a dual stack socket */
- s = sock_l4(c, AF_UNSPEC, EPOLL_TYPE_UDP_LISTEN,
- NULL, ifname, port, uref.u32);
+ s = pif_sock_l4(c, EPOLL_TYPE_UDP_LISTEN, PIF_HOST,
+ NULL, ifname, port, uref.u32);
udp_splice_init[V4][port] = s < 0 ? -1 : s;
udp_splice_init[V6][port] = s < 0 ? -1 : s;
if (IN_INTERVAL(0, FD_REF_MAX, s))
@@ -819,28 +819,38 @@ int udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
if ((af == AF_INET || af == AF_UNSPEC) && c->ifi4) {
if (!ns) {
- r4 = sock_l4(c, AF_INET, EPOLL_TYPE_UDP_LISTEN,
- addr, ifname, port, uref.u32);
+ union inany_addr aany = inany_any4;
+
+ if (addr)
+ inany_from_af(&aany, AF_INET, addr);
+
+ r4 = pif_sock_l4(c, EPOLL_TYPE_UDP_LISTEN, PIF_HOST,
+ &aany, ifname, port, uref.u32);
udp_splice_init[V4][port] = r4 < 0 ? -1 : r4;
} else {
- r4 = sock_l4(c, AF_INET, EPOLL_TYPE_UDP_LISTEN,
- &in4addr_loopback,
- ifname, port, uref.u32);
+ r4 = pif_sock_l4(c, EPOLL_TYPE_UDP_LISTEN, PIF_SPLICE,
+ &inany_loopback4, ifname,
+ port, uref.u32);
udp_splice_ns[V4][port] = r4 < 0 ? -1 : r4;
}
}
if ((af == AF_INET6 || af == AF_UNSPEC) && c->ifi6) {
if (!ns) {
- r6 = sock_l4(c, AF_INET6, EPOLL_TYPE_UDP_LISTEN,
- addr, ifname, port, uref.u32);
+ union inany_addr aany = inany_any6;
+
+ if (addr)
+ inany_from_af(&aany, AF_INET6, addr);
+
+ r6 = pif_sock_l4(c, EPOLL_TYPE_UDP_LISTEN, PIF_HOST,
+ &aany, ifname, port, uref.u32);
udp_splice_init[V6][port] = r6 < 0 ? -1 : r6;
} else {
- r6 = sock_l4(c, AF_INET6, EPOLL_TYPE_UDP_LISTEN,
- &in6addr_loopback,
- ifname, port, uref.u32);
+ r6 = pif_sock_l4(c, EPOLL_TYPE_UDP_LISTEN, PIF_SPLICE,
+ &inany_loopback6, ifname,
+ port, uref.u32);
udp_splice_ns[V6][port] = r6 < 0 ? -1 : r6;
}
}
diff --git a/util.c b/util.c
index 87309c51..ebd93ed2 100644
--- a/util.c
+++ b/util.c
@@ -157,58 +157,6 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type,
return fd;
}
-/**
- * sock_l4() - Create and bind socket for given L4, add to epoll list
- * @c: Execution context
- * @af: Address family, AF_INET or AF_INET6
- * @type: epoll type
- * @bind_addr: Address for binding, NULL for any
- * @ifname: Interface for binding, NULL for any
- * @port: Port, host order
- * @data: epoll reference portion for protocol handlers
- *
- * Return: newly created socket, negative error code on failure
- */
-int sock_l4(const struct ctx *c, sa_family_t af, enum epoll_type type,
- const void *bind_addr, const char *ifname, uint16_t port,
- uint32_t data)
-{
- switch (af) {
- case AF_INET: {
- struct sockaddr_in addr4 = {
- .sin_family = AF_INET,
- .sin_port = htons(port),
- { 0 }, { 0 },
- };
- if (bind_addr)
- addr4.sin_addr = *(struct in_addr *)bind_addr;
- return sock_l4_sa(c, type, &addr4, sizeof(addr4), ifname,
- false, data);
- }
-
- case AF_UNSPEC:
- if (!DUAL_STACK_SOCKETS || bind_addr)
- return -EINVAL;
- /* fallthrough */
- case AF_INET6: {
- struct sockaddr_in6 addr6 = {
- .sin6_family = AF_INET6,
- .sin6_port = htons(port),
- 0, IN6ADDR_ANY_INIT, 0,
- };
- if (bind_addr) {
- addr6.sin6_addr = *(struct in6_addr *)bind_addr;
-
- if (IN6_IS_ADDR_LINKLOCAL(bind_addr))
- addr6.sin6_scope_id = c->ifi6;
- }
- return sock_l4_sa(c, type, &addr6, sizeof(addr6), ifname,
- af == AF_INET6, data);
- }
- default:
- return -EINVAL;
- }
-}
/**
* sock_probe_mem() - Check if setting high SO_SNDBUF and SO_RCVBUF is allowed
diff --git a/util.h b/util.h
index 5e67f1fa..2c1e08e0 100644
--- a/util.h
+++ b/util.h
@@ -181,9 +181,6 @@ int close_range(unsigned int first, unsigned int last, int flags) {
int sock_l4_sa(const struct ctx *c, enum epoll_type type,
const void *sa, socklen_t sl,
const char *ifname, bool v6only, uint32_t data);
-int sock_l4(const struct ctx *c, sa_family_t af, enum epoll_type type,
- const void *bind_addr, const char *ifname, uint16_t port,
- uint32_t data);
void sock_probe_mem(struct ctx *c);
long timespec_diff_ms(const struct timespec *a, const struct timespec *b);
int64_t timespec_diff_us(const struct timespec *a, const struct timespec *b);
--
@@ -181,9 +181,6 @@ int close_range(unsigned int first, unsigned int last, int flags) {
int sock_l4_sa(const struct ctx *c, enum epoll_type type,
const void *sa, socklen_t sl,
const char *ifname, bool v6only, uint32_t data);
-int sock_l4(const struct ctx *c, sa_family_t af, enum epoll_type type,
- const void *bind_addr, const char *ifname, uint16_t port,
- uint32_t data);
void sock_probe_mem(struct ctx *c);
long timespec_diff_ms(const struct timespec *a, const struct timespec *b);
int64_t timespec_diff_us(const struct timespec *a, const struct timespec *b);
--
2.46.1
next prev parent reply other threads:[~2024-09-20 4:12 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-20 4:12 [PATCH 0/4] Use inany addresses through port forwarding configuration David Gibson
2024-09-20 4:12 ` [PATCH 1/4] udp: Don't attempt to get dual-stack sockets in nonsensical cases David Gibson
2024-09-20 4:12 ` David Gibson [this message]
2024-09-20 4:12 ` [PATCH 3/4] tcp, udp: Make {tcp,udp}_sock_init() take an inany address David Gibson
2024-09-20 4:12 ` [PATCH 4/4] inany: Add inany_pton() helper David Gibson
2024-09-25 17:39 ` [PATCH 0/4] Use inany addresses through port forwarding configuration Stefano Brivio
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240920041244.593192-3-david@gibson.dropbear.id.au \
--to=david@gibson.dropbear.id.au \
--cc=passt-dev@passt.top \
--cc=sbrivio@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).