public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>, passt-dev@passt.top
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v4 2/9] inany: Let length of sockaddr_inany be implicit from the family
Date: Wed, 19 Nov 2025 16:22:50 +1100	[thread overview]
Message-ID: <20251119052257.3004500-3-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20251119052257.3004500-1-david@gibson.dropbear.id.au>

sockaddr_inany can contain either an IPv4 or IPv6 socket address, so the
relevant length for bind() or connect() can vary.  In pif_sockaddr() we
return that length, and in sock_l4_sa() we take it as an extra parameter.

However, sockaddr_inany always contains exactly a sockaddr_in or a
sockaddr_in6 each with a fixed size.  Therefore we can derive the relevant
length from the family, and don't need to pass it around separately.

Make a tiny helper to get the relevant address length, and update all
interfaces to use that approach instead.

In the process, fix a buglet in tcp_flow_repair_bind(): we passed
sizeof(union sockaddr_inany) to bind() instead of the specific length for
the address family.  Since the sizeof() is always longer than the specific
length, this is probably fine, but not theoretically correct.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 flow.c       | 21 ++++++++-------------
 icmp.c       |  3 ++-
 inany.h      | 17 +++++++++++++++++
 pif.c        | 15 +++++----------
 pif.h        |  2 +-
 tcp.c        | 20 +++++++++-----------
 tcp_splice.c |  5 ++---
 udp.c        |  8 +++-----
 util.c       |  8 +++-----
 util.h       |  4 ++--
 10 files changed, 52 insertions(+), 51 deletions(-)

diff --git a/flow.c b/flow.c
index 743860d6..11e3f283 100644
--- a/flow.c
+++ b/flow.c
@@ -163,15 +163,13 @@ static void flowside_from_af(struct flowside *side, sa_family_t af,
  * @err:	Filled in with errno if something failed
  * @type:	Socket epoll type
  * @sa:		Socket address
- * @sl:		Length of @sa
  */
 struct flowside_sock_args {
 	const struct ctx *c;
 	int fd;
 	int err;
 	enum epoll_type type;
-	const struct sockaddr *sa;
-	socklen_t sl;
+	const union sockaddr_inany *sa;
 };
 
 /** flowside_sock_splice() - Create and bind socket for PIF_SPLICE based on flowside
@@ -185,8 +183,8 @@ static int flowside_sock_splice(void *arg)
 
 	ns_enter(a->c);
 
-	a->fd = sock_l4_sa(a->c, a->type, a->sa, a->sl, NULL,
-	                   a->sa->sa_family == AF_INET6);
+	a->fd = sock_l4_sa(a->c, a->type, a->sa, NULL,
+			   a->sa->sa_family == AF_INET6);
 	a->err = errno;
 
 	return 0;
@@ -207,11 +205,10 @@ int flowside_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif,
 {
 	const char *ifname = NULL;
 	union sockaddr_inany sa;
-	socklen_t sl;
 
 	ASSERT(pif_is_socket(pif));
 
-	pif_sockaddr(c, &sa, &sl, pif, &tgt->oaddr, tgt->oport);
+	pif_sockaddr(c, &sa, pif, &tgt->oaddr, tgt->oport);
 
 	switch (pif) {
 	case PIF_HOST:
@@ -222,13 +219,12 @@ int flowside_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif,
 		else if (sa.sa_family == AF_INET6)
 			ifname = c->ip6.ifname_out;
 
-		return sock_l4_sa(c, type, &sa, sl, ifname,
+		return sock_l4_sa(c, type, &sa, ifname,
 				  sa.sa_family == AF_INET6);
 
 	case PIF_SPLICE: {
 		struct flowside_sock_args args = {
-			.c = c, .type = type,
-			.sa = &sa.sa, .sl = sl,
+			.c = c, .type = type, .sa = &sa,
 		};
 		NS_CALL(flowside_sock_splice, &args);
 		errno = args.err;
@@ -257,10 +253,9 @@ int flowside_connect(const struct ctx *c, int s,
 		     uint8_t pif, const struct flowside *tgt)
 {
 	union sockaddr_inany sa;
-	socklen_t sl;
 
-	pif_sockaddr(c, &sa, &sl, pif, &tgt->eaddr, tgt->eport);
-	return connect(s, &sa.sa, sl);
+	pif_sockaddr(c, &sa, pif, &tgt->eaddr, tgt->eport);
+	return connect(s, &sa.sa, socklen_inany(&sa));
 }
 
 /** flow_log_ - Log flow-related message
diff --git a/icmp.c b/icmp.c
index 35faefb9..9564c496 100644
--- a/icmp.c
+++ b/icmp.c
@@ -312,8 +312,9 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 	ASSERT(flow_proto[pingf->f.type] == proto);
 	pingf->ts = now->tv_sec;
 
-	pif_sockaddr(c, &sa, &msh.msg_namelen, PIF_HOST, &tgt->eaddr, 0);
+	pif_sockaddr(c, &sa, PIF_HOST, &tgt->eaddr, 0);
 	msh.msg_name = &sa;
+	msh.msg_namelen = socklen_inany(&sa);
 	msh.msg_iov = iov;
 	msh.msg_iovlen = cnt;
 	msh.msg_control = NULL;
diff --git a/inany.h b/inany.h
index 7ca5cbd3..61b36fb4 100644
--- a/inany.h
+++ b/inany.h
@@ -67,6 +67,23 @@ union sockaddr_inany {
 	struct sockaddr_in6	sa6;
 };
 
+/** socklen_inany() - Get relevant address length for sockaddr_inany address
+ * @sa:		sockaddr_inany socket address
+ *
+ * Return: socket address length for bind() or connect(), from IP family in @sa
+ */
+static inline socklen_t socklen_inany(const union sockaddr_inany *sa)
+{
+	switch (sa->sa_family) {
+	case AF_INET:
+		return sizeof(sa->sa4);
+	case AF_INET6:
+		return sizeof(sa->sa6);
+	default:
+		ASSERT(0);
+	}
+}
+
 /** inany_v4 - Extract IPv4 address, if present, from IPv[46] address
  * @addr:	IPv4 or IPv6 address
  *
diff --git a/pif.c b/pif.c
index 331942e7..e6a50806 100644
--- a/pif.c
+++ b/pif.c
@@ -30,12 +30,11 @@ static_assert(ARRAY_SIZE(pif_type_str) == PIF_NUM_TYPES,
 /** pif_sockaddr() - Construct a socket address suitable for an interface
  * @c:		Execution context
  * @sa:		Pointer to sockaddr to fill in
- * @sl:		Updated to relevant length of initialised @sa
  * @pif:	Interface to create the socket address
  * @addr:	IPv[46] address
  * @port:	Port (host byte order)
  */
-void pif_sockaddr(const struct ctx *c, union sockaddr_inany *sa, socklen_t *sl,
+void pif_sockaddr(const struct ctx *c, union sockaddr_inany *sa,
 		  uint8_t pif, const union inany_addr *addr, in_port_t port)
 {
 	const struct in_addr *v4 = inany_v4(addr);
@@ -47,7 +46,6 @@ void pif_sockaddr(const struct ctx *c, union sockaddr_inany *sa, socklen_t *sl,
 		sa->sa4.sin_addr = *v4;
 		sa->sa4.sin_port = htons(port);
 		memset(&sa->sa4.sin_zero, 0, sizeof(sa->sa4.sin_zero));
-		*sl = sizeof(sa->sa4);
 	} else {
 		sa->sa_family = AF_INET6;
 		sa->sa6.sin6_addr = addr->a6;
@@ -57,7 +55,6 @@ void pif_sockaddr(const struct ctx *c, union sockaddr_inany *sa, socklen_t *sl,
 		else
 			sa->sa6.sin6_scope_id = 0;
 		sa->sa6.sin6_flowinfo = 0;
-		*sl = sizeof(sa->sa6);
 	}
 }
 
@@ -85,7 +82,6 @@ int pif_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif,
 		.sa6.sin6_port = htons(port),
 	};
 	union epoll_ref ref;
-	socklen_t sl;
 	int ret;
 
 	ASSERT(pif_is_socket(pif));
@@ -97,12 +93,11 @@ int pif_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif,
 	}
 
 	if (!addr) {
-		ref.fd = sock_l4_sa(c, type, &sa, sizeof(sa.sa6),
-				    ifname, false);
+		ref.fd = sock_l4_sa(c, type, &sa, ifname, false);
 	} else {
-		pif_sockaddr(c, &sa, &sl, pif, addr, port);
-		ref.fd = sock_l4_sa(c, type, &sa, sl,
-				    ifname, sa.sa_family == AF_INET6);
+		pif_sockaddr(c, &sa, pif, addr, port);
+		ref.fd = sock_l4_sa(c, type, &sa, ifname,
+				    sa.sa_family == AF_INET6);
 	}
 
 	if (ref.fd < 0)
diff --git a/pif.h b/pif.h
index f029282b..0f7f6672 100644
--- a/pif.h
+++ b/pif.h
@@ -57,7 +57,7 @@ static inline bool pif_is_socket(uint8_t pif)
 	return pif == PIF_HOST || pif == PIF_SPLICE;
 }
 
-void pif_sockaddr(const struct ctx *c, union sockaddr_inany *sa, socklen_t *sl,
+void pif_sockaddr(const struct ctx *c, union sockaddr_inany *sa,
 		  uint8_t pif, const union inany_addr *addr, in_port_t port);
 int pif_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif,
 		const union inany_addr *addr, const char *ifname,
diff --git a/tcp.c b/tcp.c
index 3202d338..b8a98523 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1457,12 +1457,11 @@ static void tcp_bind_outbound(const struct ctx *c,
 {
 	const struct flowside *tgt = &conn->f.side[TGTSIDE];
 	union sockaddr_inany bind_sa;
-	socklen_t sl;
 
 
-	pif_sockaddr(c, &bind_sa, &sl, PIF_HOST, &tgt->oaddr, tgt->oport);
+	pif_sockaddr(c, &bind_sa, PIF_HOST, &tgt->oaddr, tgt->oport);
 	if (!inany_is_unspecified(&tgt->oaddr) || tgt->oport) {
-		if (bind(s, &bind_sa.sa, sl)) {
+		if (bind(s, &bind_sa.sa, socklen_inany(&bind_sa))) {
 			char sstr[INANY_ADDRSTRLEN];
 
 			flow_dbg_perror(conn,
@@ -1522,7 +1521,6 @@ static void tcp_conn_from_tap(const struct ctx *c, sa_family_t af,
 	union flow *flow;
 	int s = -1, mss;
 	uint64_t hash;
-	socklen_t sl;
 
 	if (!(flow = flow_alloc()))
 		return;
@@ -1555,7 +1553,7 @@ static void tcp_conn_from_tap(const struct ctx *c, sa_family_t af,
 	if ((s = tcp_conn_sock(af)) < 0)
 		goto cancel;
 
-	pif_sockaddr(c, &sa, &sl, PIF_HOST, &tgt->eaddr, tgt->eport);
+	pif_sockaddr(c, &sa, PIF_HOST, &tgt->eaddr, tgt->eport);
 
 	/* Use bind() to check if the target address is local (EADDRINUSE or
 	 * similar) and already bound, and set the LOCAL flag in that case.
@@ -1567,7 +1565,7 @@ static void tcp_conn_from_tap(const struct ctx *c, sa_family_t af,
 	 *
 	 * So, if bind() succeeds, close the socket, get a new one, and proceed.
 	 */
-	if (bind(s, &sa.sa, sl)) {
+	if (bind(s, &sa.sa, socklen_inany(&sa))) {
 		if (errno != EADDRNOTAVAIL && errno != EACCES)
 			conn_flag(c, conn, LOCAL);
 	} else {
@@ -1607,7 +1605,7 @@ static void tcp_conn_from_tap(const struct ctx *c, sa_family_t af,
 
 	tcp_bind_outbound(c, conn, s);
 
-	if (connect(s, &sa.sa, sl)) {
+	if (connect(s, &sa.sa, socklen_inany(&sa))) {
 		if (errno != EINPROGRESS) {
 			tcp_rst(c, conn);
 			goto cancel;
@@ -1626,7 +1624,8 @@ static void tcp_conn_from_tap(const struct ctx *c, sa_family_t af,
 	tcp_epoll_ctl(c, conn);
 
 	if (c->mode == MODE_VU) { /* To rebind to same oport after migration */
-		sl = sizeof(sa);
+		socklen_t sl = sizeof(sa);
+
 		if (getsockname(s, &sa.sa, &sl) ||
 		    inany_from_sockaddr(&tgt->oaddr, &tgt->oport, &sa) < 0)
 			err_perror("Can't get local address for socket %i", s);
@@ -3611,11 +3610,10 @@ static int tcp_flow_repair_bind(const struct ctx *c, struct tcp_tap_conn *conn)
 {
 	const struct flowside *sockside = HOSTFLOW(conn);
 	union sockaddr_inany a;
-	socklen_t sl;
 
-	pif_sockaddr(c, &a, &sl, PIF_HOST, &sockside->oaddr, sockside->oport);
+	pif_sockaddr(c, &a, PIF_HOST, &sockside->oaddr, sockside->oport);
 
-	if (bind(conn->sock, &a.sa, sizeof(a))) {
+	if (bind(conn->sock, &a.sa, socklen_inany(&a))) {
 		int rc = -errno;
 		flow_perror(conn, "Failed to bind socket for migrated flow");
 		return rc;
diff --git a/tcp_splice.c b/tcp_splice.c
index 5efd8597..717766af 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -352,7 +352,6 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn)
 	sa_family_t af = inany_v4(&tgt->eaddr) ? AF_INET : AF_INET6;
 	uint8_t tgtpif = conn->f.pif[TGTSIDE];
 	union sockaddr_inany sa;
-	socklen_t sl;
 	int one = 1;
 
 	if (tgtpif == PIF_HOST)
@@ -380,9 +379,9 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn)
 			   conn->s[1]);
 	}
 
-	pif_sockaddr(c, &sa, &sl, tgtpif, &tgt->eaddr, tgt->eport);
+	pif_sockaddr(c, &sa, tgtpif, &tgt->eaddr, tgt->eport);
 
-	if (connect(conn->s[1], &sa.sa, sl)) {
+	if (connect(conn->s[1], &sa.sa, socklen_inany(&sa))) {
 		if (errno != EINPROGRESS) {
 			flow_trace(conn, "Couldn't connect socket for splice: %s",
 				   strerror_(errno));
diff --git a/udp.c b/udp.c
index 9c009502..d414ca32 100644
--- a/udp.c
+++ b/udp.c
@@ -780,7 +780,6 @@ static void udp_sock_to_sock(const struct ctx *c, int from_s, int n,
 	const struct udp_flow *uflow = udp_at_sidx(tosidx);
 	uint8_t topif = pif_at_sidx(tosidx);
 	int to_s = uflow->s[tosidx.sidei];
-	socklen_t sl;
 	int i;
 
 	if ((n = udp_sock_recv(c, from_s, udp_mh_recv, n)) <= 0)
@@ -791,7 +790,7 @@ static void udp_sock_to_sock(const struct ctx *c, int from_s, int n,
 			= udp_mh_recv[i].msg_len;
 	}
 
-	pif_sockaddr(c, &udp_splice_to, &sl, topif,
+	pif_sockaddr(c, &udp_splice_to, topif,
 		     &toside->eaddr, toside->eport);
 
 	sendmmsg(to_s, udp_mh_splice, n, MSG_NOSIGNAL);
@@ -999,7 +998,6 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
 	flow_sidx_t tosidx;
 	in_port_t src, dst;
 	uint8_t topif;
-	socklen_t sl;
 
 	ASSERT(!c->no_udp);
 
@@ -1041,7 +1039,7 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
 	s = uflow->s[tosidx.sidei];
 	ASSERT(s >= 0);
 
-	pif_sockaddr(c, &to_sa, &sl, topif, &toside->eaddr, toside->eport);
+	pif_sockaddr(c, &to_sa, topif, &toside->eaddr, toside->eport);
 
 	for (i = 0, j = 0; i < (int)p->count - idx && j < UIO_MAXIOV; i++) {
 		const struct udphdr *uh_send;
@@ -1054,7 +1052,7 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
 			return p->count - idx;
 
 		mm[i].msg_hdr.msg_name = &to_sa;
-		mm[i].msg_hdr.msg_namelen = sl;
+		mm[i].msg_hdr.msg_namelen = socklen_inany(&to_sa);
 
 		if (data.cnt) {
 			int cnt;
diff --git a/util.c b/util.c
index ab23463b..7df50c69 100644
--- a/util.c
+++ b/util.c
@@ -44,17 +44,15 @@
  * @c:		Execution context
  * @type:	epoll type
  * @sa:		Socket address to bind to
- * @sl:		Length of @sa
  * @ifname:	Interface for binding, NULL for any
  * @v6only:	Set IPV6_V6ONLY socket option
  *
  * Return: newly created socket, negative error code on failure
  */
 int sock_l4_sa(const struct ctx *c, enum epoll_type type,
-	       const void *sa, socklen_t sl,
-	       const char *ifname, bool v6only)
+	       const union sockaddr_inany *sa, const char *ifname, bool v6only)
 {
-	sa_family_t af = ((const struct sockaddr *)sa)->sa_family;
+	sa_family_t af = sa->sa_family;
 	bool freebind = false;
 	int fd, y = 1, ret;
 	uint8_t proto;
@@ -146,7 +144,7 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type,
 		}
 	}
 
-	if (bind(fd, sa, sl) < 0) {
+	if (bind(fd, &sa->sa, socklen_inany(sa)) < 0) {
 		/* We'll fail to bind to low ports if we don't have enough
 		 * capabilities, and we'll fail to bind on already bound ports,
 		 * this is fine. This might also fail for ICMP because of a
diff --git a/util.h b/util.h
index a0b2ada6..f8294199 100644
--- a/util.h
+++ b/util.h
@@ -206,10 +206,10 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
 #include "packet.h"
 
 struct ctx;
+union sockaddr_inany;
 
 int sock_l4_sa(const struct ctx *c, enum epoll_type type,
-	       const void *sa, socklen_t sl,
-	       const char *ifname, bool v6only);
+	       const union sockaddr_inany *sa, const char *ifname, bool v6only);
 int sock_unix(char *sock_path);
 void sock_probe_mem(struct ctx *c);
 long timespec_diff_ms(const struct timespec *a, const struct timespec *b);
-- 
2.51.1


  parent reply	other threads:[~2025-11-19  5:23 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-19  5:22 [PATCH v4 0/9] Reduce differences between inbound and outbound socket binding David Gibson
2025-11-19  5:22 ` [PATCH v4 1/9] flow: Remove bogus @path field from flowside_sock_args David Gibson
2025-11-19  5:22 ` David Gibson [this message]
2025-11-19  5:22 ` [PATCH v4 3/9] util, flow, pif: Simplify sock_l4_sa() interface David Gibson
2025-11-19  5:22 ` [PATCH v4 4/9] tcp: Merge tcp_ns_sock_init[46]() into tcp_sock_init_one() David Gibson
2025-11-19  5:22 ` [PATCH v4 5/9] udp: Unify some more inbound/outbound parts of udp_sock_init() David Gibson
2025-11-19  5:22 ` [PATCH v4 6/9] udp: Move udp_sock_init() special case to its caller David Gibson
2025-11-19  5:22 ` [PATCH v4 7/9] util: Fix setting of IPV6_V6ONLY socket option David Gibson
2025-11-19  5:22 ` [PATCH v4 8/9] tcp, udp: Remove fallback if creating dual stack socket fails David Gibson
2025-11-19  5:22 ` [PATCH v4 9/9] tcp, udp: Bind outbound listening sockets by interface instead of address David Gibson
2025-11-21  3:56   ` Stefano Brivio
2025-11-21  5:24     ` David Gibson
2025-11-21  5:39       ` Stefano Brivio
2025-11-26  5:42     ` David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251119052257.3004500-3-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).