public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
* [RFC  00/12] Support for multiple address and late binding
@ 2025-12-15  1:54 Jon Maloy
  2025-12-15  1:54 ` [RFC 01/12] ip: Introduce multi-address data structures for IPv4 and IPv6 Jon Maloy
                   ` (11 more replies)
  0 siblings, 12 replies; 25+ messages in thread
From: Jon Maloy @ 2025-12-15  1:54 UTC (permalink / raw)
  To: sbrivio, dgibson, david, jmaloy, passt-dev

This series add functionality corresponding to three user stories
in https://pad.passt.top/p/NetlinkMonitor:

US-1: Dynamic Network Configuration Updates (PASTA)
US-3: US-3: Multiple IPv6 Addresses (PASTA)
US-4: Multiple IPv4 Addresses (PASTA)

Jon Maloy (12):
  ip: Introduce multi-address data structures for IPv4 and IPv6
  ip: Add ip4_default_prefix_len() helper function for class-based
    prefix
  conf: Allow multiple -a/--address options per address family
  conf: Apply -n/--netmask to most recently added address
  fwd: Check all configured addresses in guest accessibility functions
  arp: Check all configured addresses in ARP filtering
  netlink: Subscribe to link/address changes in namespace
  netlink: Subscribe to route changes in namespace
  netlink: Add host-side monitoring for late template interface binding
  netlink: Add host-side route monitoring and propagation
  netlink: Prevent host route events from overwriting guest-configured
    gateway
  netlink: Rename tap interface when late binding discovers template
    name

 arp.c        |  11 +-
 conf.c       | 135 +++++---
 dhcp.c       |   8 +-
 dhcpv6.c     |   6 +-
 epoll_type.h |   4 +
 fwd.c        |  30 +-
 ip.c         |  21 ++
 ip.h         |  28 ++
 isolation.c  |   4 +
 ndp.c        |   6 +-
 netlink.c    | 869 +++++++++++++++++++++++++++++++++++++++++++++++++++
 netlink.h    |   6 +
 passt.c      |   9 +
 passt.h      |  21 +-
 pasta.c      |  28 +-
 tap.c        |  37 ++-
 tap.h        |   1 +
 util.h       |   1 +
 18 files changed, 1137 insertions(+), 88 deletions(-)

-- 
2.51.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFC  01/12] ip: Introduce multi-address data structures for IPv4 and IPv6
  2025-12-15  1:54 [RFC 00/12] Support for multiple address and late binding Jon Maloy
@ 2025-12-15  1:54 ` Jon Maloy
  2025-12-15  9:40   ` David Gibson
  2025-12-15  9:46   ` David Gibson
  2025-12-15  1:54 ` [RFC 02/12] ip: Add ip4_default_prefix_len() helper function for class-based prefix Jon Maloy
                   ` (10 subsequent siblings)
  11 siblings, 2 replies; 25+ messages in thread
From: Jon Maloy @ 2025-12-15  1:54 UTC (permalink / raw)
  To: sbrivio, dgibson, david, jmaloy, passt-dev

As preparation for supporting multiple addresses per interface, we
replace the single addr/prefix_len fields with arrays.

- We add an ip4_addr_entry and an ip6_addr_entry struct containing
  address and prefix length.

- We set the array sizes to IP4_MAX_ADDRS=8 and IP6_MAX_ADDRS=16,
  respectively.

The only functional change is that the IPv6 prefix length now is
properly stored instead of being hardcoded to 64 even when set
via the -a option.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 arp.c    |  4 +--
 conf.c   | 97 +++++++++++++++++++++++++++++++++-----------------------
 dhcp.c   |  8 ++---
 dhcpv6.c |  6 ++--
 fwd.c    | 12 +++----
 ip.h     | 26 +++++++++++++++
 ndp.c    |  6 ++--
 passt.h  | 16 ++++++----
 pasta.c  | 12 ++++---
 tap.c    |  4 +--
 util.h   |  1 +
 11 files changed, 122 insertions(+), 70 deletions(-)

diff --git a/arp.c b/arp.c
index bb042e9..7eaf517 100644
--- a/arp.c
+++ b/arp.c
@@ -54,7 +54,7 @@ static bool ignore_arp(const struct ctx *c,
 		return true;
 
 	/* Don't resolve the guest's assigned address, either. */
-	if (!memcmp(am->tip, &c->ip4.addr, sizeof(am->tip)))
+	if (!memcmp(am->tip, &c->ip4.addrs[0].addr, sizeof(am->tip)))
 		return true;
 
 	return false;
@@ -145,7 +145,7 @@ void arp_send_init_req(const struct ctx *c)
 	memcpy(req.am.sha,	c->our_tap_mac,		sizeof(req.am.sha));
 	memcpy(req.am.sip,	&c->ip4.our_tap_addr,	sizeof(req.am.sip));
 	memcpy(req.am.tha,	MAC_BROADCAST,		sizeof(req.am.tha));
-	memcpy(req.am.tip,	&c->ip4.addr,		sizeof(req.am.tip));
+	memcpy(req.am.tip,	&c->ip4.addrs[0].addr,	sizeof(req.am.tip));
 
 	debug("Sending initial ARP request for guest MAC address");
 	tap_send_single(c, &req, sizeof(req));
diff --git a/conf.c b/conf.c
index fdc19e8..0e96f36 100644
--- a/conf.c
+++ b/conf.c
@@ -694,10 +694,12 @@ static int conf_ip4_prefix(const char *arg)
  * conf_ip4() - Verify or detect IPv4 support, get relevant addresses
  * @ifi:	Host interface to attempt (0 to determine one)
  * @ip4:	IPv4 context (will be written)
+ * @permanent:	Mark configured addresses as permanent
  *
  * Return: interface index for IPv4, or 0 on failure.
  */
-static unsigned int conf_ip4(unsigned int ifi, struct ip4_ctx *ip4)
+static unsigned int conf_ip4(unsigned int ifi, struct ip4_ctx *ip4,
+			     bool permanent)
 {
 	if (!ifi)
 		ifi = nl_get_ext_if(nl_sock, AF_INET);
@@ -717,33 +719,38 @@ static unsigned int conf_ip4(unsigned int ifi, struct ip4_ctx *ip4)
 		}
 	}
 
-	if (IN4_IS_ADDR_UNSPECIFIED(&ip4->addr)) {
+	if (!ip4->addr_count) {
 		int rc = nl_addr_get(nl_sock, ifi, AF_INET,
-				     &ip4->addr, &ip4->prefix_len, NULL);
+				     &ip4->addrs[0].addr,
+				     &ip4->addrs[0].prefix_len, NULL);
 		if (rc < 0) {
 			debug("Couldn't discover IPv4 address: %s",
 			      strerror_(-rc));
 			return 0;
 		}
+		ip4->addrs[0].permanent = permanent;
+		ip4->addr_count = 1;
 	}
 
-	if (!ip4->prefix_len) {
-		in_addr_t addr = ntohl(ip4->addr.s_addr);
-		if (IN_CLASSA(addr))
-			ip4->prefix_len = (32 - IN_CLASSA_NSHIFT);
-		else if (IN_CLASSB(addr))
-			ip4->prefix_len = (32 - IN_CLASSB_NSHIFT);
-		else if (IN_CLASSC(addr))
-			ip4->prefix_len = (32 - IN_CLASSC_NSHIFT);
+	/* Apply default prefix_len to first address if not set */
+	if (!ip4->addrs[0].prefix_len) {
+		in_addr_t a = ntohl(ip4->addrs[0].addr.s_addr);
+
+		if (IN_CLASSA(a))
+			ip4->addrs[0].prefix_len = 8;
+		else if (IN_CLASSB(a))
+			ip4->addrs[0].prefix_len = 16;
+		else if (IN_CLASSC(a))
+			ip4->addrs[0].prefix_len = 24;
 		else
-			ip4->prefix_len = 32;
+			ip4->addrs[0].prefix_len = 32;
 	}
 
-	ip4->addr_seen = ip4->addr;
+	ip4->addr_seen = ip4->addrs[0].addr;
 
 	ip4->our_tap_addr = ip4->guest_gw;
 
-	if (IN4_IS_ADDR_UNSPECIFIED(&ip4->addr))
+	if (IN4_IS_ADDR_UNSPECIFIED(&ip4->addrs[0].addr))
 		return 0;
 
 	return ifi;
@@ -755,9 +762,9 @@ static unsigned int conf_ip4(unsigned int ifi, struct ip4_ctx *ip4)
  */
 static void conf_ip4_local(struct ip4_ctx *ip4)
 {
-	ip4->addr_seen = ip4->addr = IP4_LL_GUEST_ADDR;
+	ip4->addr_seen = ip4->addrs[0].addr = IP4_LL_GUEST_ADDR;
 	ip4->our_tap_addr = ip4->guest_gw = IP4_LL_GUEST_GW;
-	ip4->prefix_len = IP4_LL_PREFIX_LEN;
+	ip4->addrs[0].prefix_len = IP4_LL_PREFIX_LEN;
 
 	ip4->no_copy_addrs = ip4->no_copy_routes = true;
 }
@@ -766,10 +773,12 @@ static void conf_ip4_local(struct ip4_ctx *ip4)
  * conf_ip6() - Verify or detect IPv6 support, get relevant addresses
  * @ifi:	Host interface to attempt (0 to determine one)
  * @ip6:	IPv6 context (will be written)
+ * @permanent:	Mark discovered addresses as permanent
  *
  * Return: interface index for IPv6, or 0 on failure.
  */
-static unsigned int conf_ip6(unsigned int ifi, struct ip6_ctx *ip6)
+static unsigned int conf_ip6(unsigned int ifi, struct ip6_ctx *ip6,
+			     bool permanent)
 {
 	int prefix_len = 0;
 	int rc;
@@ -792,19 +801,25 @@ static unsigned int conf_ip6(unsigned int ifi, struct ip6_ctx *ip6)
 	}
 
 	rc = nl_addr_get(nl_sock, ifi, AF_INET6,
-			 IN6_IS_ADDR_UNSPECIFIED(&ip6->addr) ? &ip6->addr : NULL,
+			 ip6->addr_count ? NULL : &ip6->addrs[0].addr,
 			 &prefix_len, &ip6->our_tap_ll);
 	if (rc < 0) {
 		debug("Couldn't discover IPv6 address: %s", strerror_(-rc));
 		return 0;
 	}
 
-	ip6->addr_seen = ip6->addr;
+	if (!ip6->addr_count) {
+		ip6->addrs[0].prefix_len = prefix_len ? prefix_len : 64;
+		ip6->addrs[0].permanent = permanent;
+		ip6->addr_count = 1;
+	}
+
+	ip6->addr_seen = ip6->addrs[0].addr;
 
 	if (IN6_IS_ADDR_LINKLOCAL(&ip6->guest_gw))
 		ip6->our_tap_ll = ip6->guest_gw;
 
-	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->addr) ||
+	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->addrs[0].addr) ||
 	    IN6_IS_ADDR_UNSPECIFIED(&ip6->our_tap_ll))
 		return 0;
 
@@ -1149,11 +1164,13 @@ static void conf_print(const struct ctx *c)
 		if (!c->no_dhcp) {
 			uint32_t mask;
 
-			mask = htonl(0xffffffff << (32 - c->ip4.prefix_len));
+			mask = htonl(0xffffffff <<
+				     (32 - c->ip4.addrs[0].prefix_len));
 
 			info("DHCP:");
 			info("    assign: %s",
-			     inet_ntop(AF_INET, &c->ip4.addr, buf4, sizeof(buf4)));
+			     inet_ntop(AF_INET, &c->ip4.addrs[0].addr,
+				       buf4, sizeof(buf4)));
 			info("    mask: %s",
 			     inet_ntop(AF_INET, &mask,        buf4, sizeof(buf4)));
 			info("    router: %s",
@@ -1191,7 +1208,8 @@ static void conf_print(const struct ctx *c)
 			goto dns6;
 
 		info("    assign: %s",
-		     inet_ntop(AF_INET6, &c->ip6.addr, buf6, sizeof(buf6)));
+		     inet_ntop(AF_INET6, &c->ip6.addrs[0].addr,
+			       buf6, sizeof(buf6)));
 		info("    router: %s",
 		     inet_ntop(AF_INET6, &c->ip6.guest_gw, buf6, sizeof(buf6)));
 		info("    our link-local: %s",
@@ -1812,22 +1830,23 @@ void conf(struct ctx *c, int argc, char **argv)
 			break;
 		}
 		case 'a':
-			if (inet_pton(AF_INET6, optarg, &c->ip6.addr)	&&
-			    !IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr)	&&
-			    !IN6_IS_ADDR_LOOPBACK(&c->ip6.addr)		&&
-			    !IN6_IS_ADDR_V4MAPPED(&c->ip6.addr)		&&
-			    !IN6_IS_ADDR_V4COMPAT(&c->ip6.addr)		&&
-			    !IN6_IS_ADDR_MULTICAST(&c->ip6.addr)) {
+			if (inet_pton(AF_INET6, optarg,
+				      &c->ip6.addrs[0].addr) &&
+			    !IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addrs[0].addr) &&
+			    !IN6_IS_ADDR_LOOPBACK(&c->ip6.addrs[0].addr) &&
+			    !IN6_IS_ADDR_V4MAPPED(&c->ip6.addrs[0].addr) &&
+			    !IN6_IS_ADDR_V4COMPAT(&c->ip6.addrs[0].addr) &&
+			    !IN6_IS_ADDR_MULTICAST(&c->ip6.addrs[0].addr)) {
 				if (c->mode == MODE_PASTA)
 					c->ip6.no_copy_addrs = true;
 				break;
 			}
 
-			if (inet_pton(AF_INET, optarg, &c->ip4.addr)	&&
-			    !IN4_IS_ADDR_UNSPECIFIED(&c->ip4.addr)	&&
-			    !IN4_IS_ADDR_BROADCAST(&c->ip4.addr)	&&
-			    !IN4_IS_ADDR_LOOPBACK(&c->ip4.addr)		&&
-			    !IN4_IS_ADDR_MULTICAST(&c->ip4.addr)) {
+			if (inet_pton(AF_INET, optarg, &c->ip4.addrs[0].addr) &&
+			    !IN4_IS_ADDR_UNSPECIFIED(&c->ip4.addrs[0].addr) &&
+			    !IN4_IS_ADDR_BROADCAST(&c->ip4.addrs[0].addr) &&
+			    !IN4_IS_ADDR_LOOPBACK(&c->ip4.addrs[0].addr) &&
+			    !IN4_IS_ADDR_MULTICAST(&c->ip4.addrs[0].addr)) {
 				if (c->mode == MODE_PASTA)
 					c->ip4.no_copy_addrs = true;
 				break;
@@ -1836,8 +1855,8 @@ void conf(struct ctx *c, int argc, char **argv)
 			die("Invalid address: %s", optarg);
 			break;
 		case 'n':
-			c->ip4.prefix_len = conf_ip4_prefix(optarg);
-			if (c->ip4.prefix_len < 0)
+			c->ip4.addrs[0].prefix_len = conf_ip4_prefix(optarg);
+			if (c->ip4.addrs[0].prefix_len < 0)
 				die("Invalid netmask: %s", optarg);
 
 			break;
@@ -1984,9 +2003,9 @@ void conf(struct ctx *c, int argc, char **argv)
 
 	nl_sock_init(c, false);
 	if (!v6_only)
-		c->ifi4 = conf_ip4(ifi4, &c->ip4);
+		c->ifi4 = conf_ip4(ifi4, &c->ip4, c->pasta_conf_ns);
 	if (!v4_only)
-		c->ifi6 = conf_ip6(ifi6, &c->ip6);
+		c->ifi6 = conf_ip6(ifi6, &c->ip6, c->pasta_conf_ns);
 
 	if (c->ifi4 && c->mtu < IPV4_MIN_MTU) {
 		warn("MTU %"PRIu16" is too small for IPv4 (minimum %u)",
@@ -2125,7 +2144,7 @@ void conf(struct ctx *c, int argc, char **argv)
 	if (!c->ifi6) {
 		c->no_ndp = 1;
 		c->no_dhcpv6 = 1;
-	} else if (IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr)) {
+	} else if (IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addrs[0].addr)) {
 		c->no_dhcpv6 = 1;
 	}
 
diff --git a/dhcp.c b/dhcp.c
index 6b9c2e3..46ef8e3 100644
--- a/dhcp.c
+++ b/dhcp.c
@@ -352,7 +352,7 @@ int dhcp(const struct ctx *c, struct iov_tail *data)
 	reply.secs		= 0;
 	reply.flags		= m->flags;
 	reply.ciaddr		= m->ciaddr;
-	reply.yiaddr		= c->ip4.addr;
+	reply.yiaddr		= c->ip4.addrs[0].addr;
 	reply.siaddr		= 0;
 	reply.giaddr		= m->giaddr;
 	memcpy(&reply.chaddr,	m->chaddr,	sizeof(reply.chaddr));
@@ -404,7 +404,7 @@ int dhcp(const struct ctx *c, struct iov_tail *data)
 
 	info("    from %s", eth_ntop(m->chaddr, macstr, sizeof(macstr)));
 
-	mask.s_addr = htonl(0xffffffff << (32 - c->ip4.prefix_len));
+	mask.s_addr = htonl(0xffffffff << (32 - c->ip4.addrs[0].prefix_len));
 	memcpy(opts[1].s,  &mask,                sizeof(mask));
 	memcpy(opts[3].s,  &c->ip4.guest_gw,     sizeof(c->ip4.guest_gw));
 	memcpy(opts[54].s, &c->ip4.our_tap_addr, sizeof(c->ip4.our_tap_addr));
@@ -412,7 +412,7 @@ int dhcp(const struct ctx *c, struct iov_tail *data)
 	/* If the gateway is not on the assigned subnet, send an option 121
 	 * (Classless Static Routing) adding a dummy route to it.
 	 */
-	if ((c->ip4.addr.s_addr & mask.s_addr)
+	if ((c->ip4.addrs[0].addr.s_addr & mask.s_addr)
 	    != (c->ip4.guest_gw.s_addr & mask.s_addr)) {
 		/* a.b.c.d/32:0.0.0.0, 0:a.b.c.d */
 		opts[121].slen = 14;
@@ -469,7 +469,7 @@ int dhcp(const struct ctx *c, struct iov_tail *data)
 	if (m->flags & FLAG_BROADCAST)
 		dst = in4addr_broadcast;
 	else
-		dst = c->ip4.addr;
+		dst = c->ip4.addrs[0].addr;
 
 	tap_udp4_send(c, c->ip4.our_tap_addr, 67, dst, 68, &reply, dlen);
 
diff --git a/dhcpv6.c b/dhcpv6.c
index e4df0db..7eae6a1 100644
--- a/dhcpv6.c
+++ b/dhcpv6.c
@@ -625,7 +625,7 @@ int dhcpv6(struct ctx *c, struct iov_tail *data,
 		if (mh->type == TYPE_CONFIRM && server_id)
 			return -1;
 
-		if (dhcpv6_ia_notonlink(data, &c->ip6.addr)) {
+		if (dhcpv6_ia_notonlink(data, &c->ip6.addrs[0].addr)) {
 
 			dhcpv6_send_ia_notonlink(c, data, &client_id_base,
 						 ntohs(client_id->l), mh->xid);
@@ -679,7 +679,7 @@ int dhcpv6(struct ctx *c, struct iov_tail *data,
 
 	tap_udp6_send(c, src, 547, tap_ip6_daddr(c, src), 546,
 		      mh->xid, &resp, n);
-	c->ip6.addr_seen = c->ip6.addr;
+	c->ip6.addr_seen = c->ip6.addrs[0].addr;
 
 	return 1;
 }
@@ -703,5 +703,5 @@ void dhcpv6_init(const struct ctx *c)
 	memcpy(resp_not_on_link.server_id.duid_lladdr,
 	       c->our_tap_mac, sizeof(c->our_tap_mac));
 
-	resp.ia_addr.addr	= c->ip6.addr;
+	resp.ia_addr.addr	= c->ip6.addrs[0].addr;
 }
diff --git a/fwd.c b/fwd.c
index 44a0e10..408af30 100644
--- a/fwd.c
+++ b/fwd.c
@@ -516,7 +516,7 @@ static bool fwd_guest_accessible4(const struct ctx *c,
 	/* For IPv4, addr_seen is initialised to addr, so is always a valid
 	 * address
 	 */
-	if (IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addr) ||
+	if (IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addrs[0].addr) ||
 	    IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addr_seen))
 		return false;
 
@@ -537,7 +537,7 @@ static bool fwd_guest_accessible6(const struct ctx *c,
 	if (IN6_IS_ADDR_LOOPBACK(addr))
 		return false;
 
-	if (IN6_ARE_ADDR_EQUAL(addr, &c->ip6.addr))
+	if (IN6_ARE_ADDR_EQUAL(addr, &c->ip6.addrs[0].addr))
 		return false;
 
 	/* For IPv6, addr_seen starts unspecified, because we don't know what LL
@@ -587,9 +587,9 @@ static void nat_outbound(const struct ctx *c, const union inany_addr *addr,
 	else if (inany_equals6(addr, &c->ip6.map_host_loopback))
 		*translated = inany_loopback6;
 	else if (inany_equals4(addr, &c->ip4.map_guest_addr))
-		*translated = inany_from_v4(c->ip4.addr);
+		*translated = inany_from_v4(c->ip4.addrs[0].addr);
 	else if (inany_equals6(addr, &c->ip6.map_guest_addr))
-		translated->a6 = c->ip6.addr;
+		translated->a6 = c->ip6.addrs[0].addr;
 	else
 		*translated = *addr;
 }
@@ -710,10 +710,10 @@ bool nat_inbound(const struct ctx *c, const union inany_addr *addr,
 		   inany_equals6(addr, &in6addr_loopback)) {
 		translated->a6 = c->ip6.map_host_loopback;
 	} else if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.map_guest_addr) &&
-		   inany_equals4(addr, &c->ip4.addr)) {
+		   inany_equals4(addr, &c->ip4.addrs[0].addr)) {
 		*translated = inany_from_v4(c->ip4.map_guest_addr);
 	} else if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.map_guest_addr) &&
-		   inany_equals6(addr, &c->ip6.addr)) {
+		   inany_equals6(addr, &c->ip6.addrs[0].addr)) {
 		translated->a6 = c->ip6.map_guest_addr;
 	} else if (fwd_guest_accessible(c, addr)) {
 		*translated = *addr;
diff --git a/ip.h b/ip.h
index 5830b92..748cb1f 100644
--- a/ip.h
+++ b/ip.h
@@ -135,4 +135,30 @@ static const struct in_addr in4addr_broadcast = { 0xffffffff };
 #define IPV6_MIN_MTU		1280
 #endif
 
+/* Maximum number of addresses per address family */
+#define IP4_MAX_ADDRS		8
+#define IP6_MAX_ADDRS		16
+
+/**
+ * struct ip4_addr_entry - IPv4 address with prefix length
+ * @addr:		IPv4 address
+ * @prefix_len:		Prefix length (netmask bits)
+ */
+struct ip4_addr_entry {
+	struct in_addr addr;
+	int prefix_len;
+	int permanent;
+};
+
+/**
+ * struct ip6_addr_entry - IPv6 address with prefix length
+ * @addr:		IPv6 address
+ * @prefix_len:		Prefix length
+ */
+struct ip6_addr_entry {
+	struct in6_addr addr;
+	int prefix_len;
+	int permanent;
+};
+
 #endif /* IP_H */
diff --git a/ndp.c b/ndp.c
index eb9e313..868a234 100644
--- a/ndp.c
+++ b/ndp.c
@@ -257,7 +257,7 @@ static void ndp_ra(const struct ctx *c, const struct in6_addr *dst)
 			.valid_lifetime		= ~0U,
 			.pref_lifetime		= ~0U,
 		},
-		.prefix = c->ip6.addr,
+		.prefix = c->ip6.addrs[0].addr,
 		.source_ll = {
 			.header = {
 				.type		= OPT_SRC_L2_ADDR,
@@ -466,8 +466,8 @@ void ndp_send_init_req(const struct ctx *c)
 			.icmp6_solicited	= 0, /* Reserved */
 			.icmp6_override		= 0, /* Reserved */
 		},
-		.target_addr = c->ip6.addr
+		.target_addr = c->ip6.addrs[0].addr
 	};
 	debug("Sending initial NDP NS request for guest MAC address");
-	ndp_send(c, &c->ip6.addr, &ns, sizeof(ns));
+	ndp_send(c, &c->ip6.addrs[0].addr, &ns, sizeof(ns));
 }
diff --git a/passt.h b/passt.h
index 79d01dd..533f2cb 100644
--- a/passt.h
+++ b/passt.h
@@ -66,9 +66,9 @@ enum passt_modes {
 
 /**
  * struct ip4_ctx - IPv4 execution context
- * @addr:		IPv4 address assigned to guest
+ * @addrs:		IPv4 addresses assigned to guest
+ * @addr_count:		Number of addresses in addrs[] array
  * @addr_seen:		Latest IPv4 address seen as source from tap
- * @prefixlen:		IPv4 prefix length (netmask)
  * @guest_gw:		IPv4 gateway as seen by the guest
  * @map_host_loopback:	Outbound connections to this address are NATted to the
  *                      host's 127.0.0.1
@@ -85,9 +85,10 @@ enum passt_modes {
  */
 struct ip4_ctx {
 	/* PIF_TAP addresses */
-	struct in_addr addr;
+	struct ip4_addr_entry addrs[IP4_MAX_ADDRS];
+	int addr_count;
+
 	struct in_addr addr_seen;
-	int prefix_len;
 	struct in_addr guest_gw;
 	struct in_addr map_host_loopback;
 	struct in_addr map_guest_addr;
@@ -107,7 +108,8 @@ struct ip4_ctx {
 
 /**
  * struct ip6_ctx - IPv6 execution context
- * @addr:		IPv6 address assigned to guest
+ * @addrs:		IPv6 addresses assigned to guest
+ * @addr_count:		Number of addresses in addrs[] array
  * @addr_seen:		Latest IPv6 global/site address seen as source from tap
  * @addr_ll_seen:	Latest IPv6 link-local address seen as source from tap
  * @guest_gw:		IPv6 gateway as seen by the guest
@@ -126,7 +128,9 @@ struct ip4_ctx {
  */
 struct ip6_ctx {
 	/* PIF_TAP addresses */
-	struct in6_addr addr;
+	struct ip6_addr_entry addrs[IP6_MAX_ADDRS];
+	int addr_count;
+
 	struct in6_addr addr_seen;
 	struct in6_addr addr_ll_seen;
 	struct in6_addr guest_gw;
diff --git a/pasta.c b/pasta.c
index 674b554..49b393c 100644
--- a/pasta.c
+++ b/pasta.c
@@ -331,8 +331,8 @@ void pasta_ns_conf(struct ctx *c)
 			if (c->ip4.no_copy_addrs) {
 				rc = nl_addr_set(nl_sock_ns, c->pasta_ifi,
 						 AF_INET,
-						 &c->ip4.addr,
-						 c->ip4.prefix_len);
+						 &c->ip4.addrs[0].addr,
+						 c->ip4.addrs[0].prefix_len);
 			} else {
 				rc = nl_addr_dup(nl_sock, c->ifi4,
 						 nl_sock_ns, c->pasta_ifi,
@@ -378,10 +378,12 @@ void pasta_ns_conf(struct ctx *c)
 					  0, IFF_NOARP);
 
 			if (c->ip6.no_copy_addrs) {
-				if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr)) {
+				struct in6_addr *a = &c->ip6.addrs[0].addr;
+
+				if (!IN6_IS_ADDR_UNSPECIFIED(a)) {
 					rc = nl_addr_set(nl_sock_ns,
-							 c->pasta_ifi, AF_INET6,
-							 &c->ip6.addr, 64);
+							 c->pasta_ifi,
+							 AF_INET6, a, 64);
 				}
 			} else {
 				rc = nl_addr_dup(nl_sock, c->ifi6,
diff --git a/tap.c b/tap.c
index e3ea61c..0b96cc1 100644
--- a/tap.c
+++ b/tap.c
@@ -951,8 +951,8 @@ resume:
 				c->ip6.addr_seen = *saddr;
 			}
 
-			if (IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr))
-				c->ip6.addr = *saddr;
+			if (IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addrs[0].addr))
+				c->ip6.addrs[0].addr = *saddr;
 		} else if (!IN6_IS_ADDR_UNSPECIFIED(saddr)){
 			c->ip6.addr_seen = *saddr;
 		}
diff --git a/util.h b/util.h
index f7a941f..4273e0d 100644
--- a/util.h
+++ b/util.h
@@ -401,4 +401,5 @@ static inline int wrap_getsockname(int sockfd, struct sockaddr *addr,
 #define PASST_MAXDNAME 254 /* 253 (RFC 1035) + 1 (the terminator) */
 void encode_domain_name(char *buf, const char *domain_name);
 
+
 #endif /* UTIL_H */
-- 
2.51.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFC  02/12] ip: Add ip4_default_prefix_len() helper function for class-based prefix
  2025-12-15  1:54 [RFC 00/12] Support for multiple address and late binding Jon Maloy
  2025-12-15  1:54 ` [RFC 01/12] ip: Introduce multi-address data structures for IPv4 and IPv6 Jon Maloy
@ 2025-12-15  1:54 ` Jon Maloy
  2025-12-15  9:41   ` David Gibson
  2025-12-15  1:54 ` [RFC 03/12] conf: Allow multiple -a/--address options per address family Jon Maloy
                   ` (9 subsequent siblings)
  11 siblings, 1 reply; 25+ messages in thread
From: Jon Maloy @ 2025-12-15  1:54 UTC (permalink / raw)
  To: sbrivio, dgibson, david, jmaloy, passt-dev

We add a helper function to calculate the default IPv4 prefix length
based on address class. This is used to replace the current inline
calculation in conf_ip4(), and is also a preparation for more uses
of this functionality in the coming commits.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 conf.c | 15 +++------------
 ip.c   | 21 +++++++++++++++++++++
 ip.h   |  2 ++
 3 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/conf.c b/conf.c
index 0e96f36..31acc20 100644
--- a/conf.c
+++ b/conf.c
@@ -733,18 +733,9 @@ static unsigned int conf_ip4(unsigned int ifi, struct ip4_ctx *ip4,
 	}
 
 	/* Apply default prefix_len to first address if not set */
-	if (!ip4->addrs[0].prefix_len) {
-		in_addr_t a = ntohl(ip4->addrs[0].addr.s_addr);
-
-		if (IN_CLASSA(a))
-			ip4->addrs[0].prefix_len = 8;
-		else if (IN_CLASSB(a))
-			ip4->addrs[0].prefix_len = 16;
-		else if (IN_CLASSC(a))
-			ip4->addrs[0].prefix_len = 24;
-		else
-			ip4->addrs[0].prefix_len = 32;
-	}
+	if (!ip4->addrs[0].prefix_len)
+		ip4->addrs[0].prefix_len =
+			ip4_default_prefix_len(&ip4->addrs[0].addr);
 
 	ip4->addr_seen = ip4->addrs[0].addr;
 
diff --git a/ip.c b/ip.c
index 9a7f4c5..2519c71 100644
--- a/ip.c
+++ b/ip.c
@@ -13,6 +13,8 @@
  */
 
 #include <stddef.h>
+#include <netinet/in.h>
+
 #include "util.h"
 #include "ip.h"
 
@@ -67,3 +69,22 @@ found:
 	*proto = nh;
 	return true;
 }
+
+/**
+ * ip4_default_prefix_len() - Get default prefix length for IPv4 address
+ * @addr:	IPv4 address
+ *
+ * Return: prefix length based on address class (8/16/24), or 32 for other
+ */
+int ip4_default_prefix_len(const struct in_addr *addr)
+{
+	in_addr_t a = ntohl(addr->s_addr);
+
+	if (IN_CLASSA(a))
+		return 8;
+	if (IN_CLASSB(a))
+		return 16;
+	if (IN_CLASSC(a))
+		return 24;
+	return 32;
+}
diff --git a/ip.h b/ip.h
index 748cb1f..065b78b 100644
--- a/ip.h
+++ b/ip.h
@@ -139,6 +139,8 @@ static const struct in_addr in4addr_broadcast = { 0xffffffff };
 #define IP4_MAX_ADDRS		8
 #define IP6_MAX_ADDRS		16
 
+int ip4_default_prefix_len(const struct in_addr *addr);
+
 /**
  * struct ip4_addr_entry - IPv4 address with prefix length
  * @addr:		IPv4 address
-- 
2.51.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFC  03/12] conf: Allow multiple -a/--address options per address family
  2025-12-15  1:54 [RFC 00/12] Support for multiple address and late binding Jon Maloy
  2025-12-15  1:54 ` [RFC 01/12] ip: Introduce multi-address data structures for IPv4 and IPv6 Jon Maloy
  2025-12-15  1:54 ` [RFC 02/12] ip: Add ip4_default_prefix_len() helper function for class-based prefix Jon Maloy
@ 2025-12-15  1:54 ` Jon Maloy
  2025-12-15  9:53   ` David Gibson
  2025-12-15  1:54 ` [RFC 04/12] conf: Apply -n/--netmask to most recently added address Jon Maloy
                   ` (8 subsequent siblings)
  11 siblings, 1 reply; 25+ messages in thread
From: Jon Maloy @ 2025-12-15  1:54 UTC (permalink / raw)
  To: sbrivio, dgibson, david, jmaloy, passt-dev

We enable configuration of multiple IPv4 and IPv6 addresses by allowing
repeated use of the -a/--address option.

- We update option parsing to append addresses to the addrs[] array.

- Each address specified via -a does initially get a class-based default
  prefix.

- If no -a option is given, address and prefix are inherited from
  the template interface, just like now.

- The -n/--netmask option applies only to the first address, in addrs[0].

- We configure all indicated addresses in the namespace interface.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 conf.c  | 74 ++++++++++++++++++++++++++++++++++++++-------------------
 pasta.c | 24 ++++++++++++++-----
 2 files changed, 68 insertions(+), 30 deletions(-)

diff --git a/conf.c b/conf.c
index 31acc20..e9f217b 100644
--- a/conf.c
+++ b/conf.c
@@ -741,7 +741,7 @@ static unsigned int conf_ip4(unsigned int ifi, struct ip4_ctx *ip4,
 
 	ip4->our_tap_addr = ip4->guest_gw;
 
-	if (IN4_IS_ADDR_UNSPECIFIED(&ip4->addrs[0].addr))
+	if (!ip4->addr_count)
 		return 0;
 
 	return ifi;
@@ -756,6 +756,7 @@ static void conf_ip4_local(struct ip4_ctx *ip4)
 	ip4->addr_seen = ip4->addrs[0].addr = IP4_LL_GUEST_ADDR;
 	ip4->our_tap_addr = ip4->guest_gw = IP4_LL_GUEST_GW;
 	ip4->addrs[0].prefix_len = IP4_LL_PREFIX_LEN;
+	ip4->addr_count = 1;
 
 	ip4->no_copy_addrs = ip4->no_copy_routes = true;
 }
@@ -810,8 +811,7 @@ static unsigned int conf_ip6(unsigned int ifi, struct ip6_ctx *ip6,
 	if (IN6_IS_ADDR_LINKLOCAL(&ip6->guest_gw))
 		ip6->our_tap_ll = ip6->guest_gw;
 
-	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->addrs[0].addr) ||
-	    IN6_IS_ADDR_UNSPECIFIED(&ip6->our_tap_ll))
+	if (!ip6->addr_count || IN6_IS_ADDR_UNSPECIFIED(&ip6->our_tap_ll))
 		return 0;
 
 	return ifi;
@@ -903,9 +903,11 @@ static void usage(const char *name, FILE *f, int status)
 		"    default: 65520: maximum 802.3 MTU minus 802.3 header\n"
 		"                    length, rounded to 32 bits (IPv4 words)\n"
 		"  -a, --address ADDR	Assign IPv4 or IPv6 address ADDR\n"
-		"    can be specified zero to two times (for IPv4 and IPv6)\n"
+		"    can be specified multiple times (limit: %d IPv4, %d IPv6)\n"
 		"    default: use addresses from interface with default route\n"
-		"  -n, --netmask MASK	Assign IPv4 MASK, dot-decimal or bits\n"
+		"  -n, --netmask MASK	Assign IPv4 MASK, dot-decimal or bits\n",
+		IP4_MAX_ADDRS, IP6_MAX_ADDRS);
+	FPRINTF(f,
 		"    default: netmask from matching address on the host\n"
 		"  -M, --mac-addr ADDR	Use source MAC address ADDR\n"
 		"    default: 9a:55:9a:55:9a:55 (locally administered)\n"
@@ -1159,9 +1161,11 @@ static void conf_print(const struct ctx *c)
 				     (32 - c->ip4.addrs[0].prefix_len));
 
 			info("DHCP:");
-			info("    assign: %s",
-			     inet_ntop(AF_INET, &c->ip4.addrs[0].addr,
-				       buf4, sizeof(buf4)));
+			for (i = 0; i < (int)c->ip4.addr_count; i++) {
+				info("    assign: %s",
+				     inet_ntop(AF_INET, &c->ip4.addrs[i].addr,
+					       buf4, sizeof(buf4)));
+			}
 			info("    mask: %s",
 			     inet_ntop(AF_INET, &mask,        buf4, sizeof(buf4)));
 			info("    router: %s",
@@ -1198,9 +1202,11 @@ static void conf_print(const struct ctx *c)
 		else
 			goto dns6;
 
-		info("    assign: %s",
-		     inet_ntop(AF_INET6, &c->ip6.addrs[0].addr,
-			       buf6, sizeof(buf6)));
+		for (i = 0; i < (int)c->ip6.addr_count; i++) {
+			info("    assign: %s",
+			     inet_ntop(AF_INET6, &c->ip6.addrs[i].addr,
+				       buf6, sizeof(buf6)));
+		}
 		info("    router: %s",
 		     inet_ntop(AF_INET6, &c->ip6.guest_gw, buf6, sizeof(buf6)));
 		info("    our link-local: %s",
@@ -1517,6 +1523,8 @@ void conf(struct ctx *c, int argc, char **argv)
 	struct fqdn *dnss = c->dns_search;
 	unsigned int ifi4 = 0, ifi6 = 0;
 	const char *logfile = NULL;
+	struct in6_addr addr6;
+	struct in_addr addr4;
 	size_t logsize = 0;
 	char *runas = NULL;
 	long fd_tap_opt;
@@ -1821,23 +1829,41 @@ void conf(struct ctx *c, int argc, char **argv)
 			break;
 		}
 		case 'a':
-			if (inet_pton(AF_INET6, optarg,
-				      &c->ip6.addrs[0].addr) &&
-			    !IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addrs[0].addr) &&
-			    !IN6_IS_ADDR_LOOPBACK(&c->ip6.addrs[0].addr) &&
-			    !IN6_IS_ADDR_V4MAPPED(&c->ip6.addrs[0].addr) &&
-			    !IN6_IS_ADDR_V4COMPAT(&c->ip6.addrs[0].addr) &&
-			    !IN6_IS_ADDR_MULTICAST(&c->ip6.addrs[0].addr)) {
+			if (inet_pton(AF_INET6, optarg, &addr6) &&
+			    !IN6_IS_ADDR_UNSPECIFIED(&addr6) &&
+			    !IN6_IS_ADDR_LOOPBACK(&addr6) &&
+			    !IN6_IS_ADDR_V4MAPPED(&addr6) &&
+			    !IN6_IS_ADDR_V4COMPAT(&addr6) &&
+			    !IN6_IS_ADDR_MULTICAST(&addr6)) {
+				unsigned int i = c->ip6.addr_count;
+
+				if (i >= IP6_MAX_ADDRS)
+					die("Too many IPv6 addresses");
+
+				c->ip6.addrs[i].addr = addr6;
+				c->ip6.addrs[i].prefix_len = 64;
+				c->ip6.addrs[i].permanent = true;
+				c->ip6.addr_count++;
 				if (c->mode == MODE_PASTA)
 					c->ip6.no_copy_addrs = true;
 				break;
 			}
 
-			if (inet_pton(AF_INET, optarg, &c->ip4.addrs[0].addr) &&
-			    !IN4_IS_ADDR_UNSPECIFIED(&c->ip4.addrs[0].addr) &&
-			    !IN4_IS_ADDR_BROADCAST(&c->ip4.addrs[0].addr) &&
-			    !IN4_IS_ADDR_LOOPBACK(&c->ip4.addrs[0].addr) &&
-			    !IN4_IS_ADDR_MULTICAST(&c->ip4.addrs[0].addr)) {
+			if (inet_pton(AF_INET, optarg, &addr4) &&
+			    !IN4_IS_ADDR_UNSPECIFIED(&addr4) &&
+			    !IN4_IS_ADDR_BROADCAST(&addr4) &&
+			    !IN4_IS_ADDR_LOOPBACK(&addr4) &&
+			    !IN4_IS_ADDR_MULTICAST(&addr4)) {
+				unsigned int i = c->ip4.addr_count;
+
+				if (i >= IP4_MAX_ADDRS)
+					die("Too many IPv4 addresses");
+
+				c->ip4.addrs[i].addr = addr4;
+				c->ip4.addrs[i].prefix_len =
+					ip4_default_prefix_len(&addr4);
+				c->ip4.addrs[i].permanent = true;
+				c->ip4.addr_count++;
 				if (c->mode == MODE_PASTA)
 					c->ip4.no_copy_addrs = true;
 				break;
@@ -2135,7 +2161,7 @@ void conf(struct ctx *c, int argc, char **argv)
 	if (!c->ifi6) {
 		c->no_ndp = 1;
 		c->no_dhcpv6 = 1;
-	} else if (IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addrs[0].addr)) {
+	} else if (!c->ip6.addr_count) {
 		c->no_dhcpv6 = 1;
 	}
 
diff --git a/pasta.c b/pasta.c
index 49b393c..fe2908f 100644
--- a/pasta.c
+++ b/pasta.c
@@ -329,10 +329,16 @@ void pasta_ns_conf(struct ctx *c)
 
 		if (c->ifi4) {
 			if (c->ip4.no_copy_addrs) {
-				rc = nl_addr_set(nl_sock_ns, c->pasta_ifi,
-						 AF_INET,
-						 &c->ip4.addrs[0].addr,
-						 c->ip4.addrs[0].prefix_len);
+				int i;
+
+				for (i = 0; i < c->ip4.addr_count; i++) {
+					rc = nl_addr_set(nl_sock_ns,
+							 c->pasta_ifi, AF_INET,
+							 &c->ip4.addrs[i].addr,
+							 c->ip4.addrs[i].prefix_len);
+					if (rc < 0)
+						break;
+				}
 			} else {
 				rc = nl_addr_dup(nl_sock, c->ifi4,
 						 nl_sock_ns, c->pasta_ifi,
@@ -378,12 +384,18 @@ void pasta_ns_conf(struct ctx *c)
 					  0, IFF_NOARP);
 
 			if (c->ip6.no_copy_addrs) {
-				struct in6_addr *a = &c->ip6.addrs[0].addr;
+				struct in6_addr *a;
+				int i;
 
-				if (!IN6_IS_ADDR_UNSPECIFIED(a)) {
+				for (i = 0; i < c->ip6.addr_count; i++) {
+					a = &c->ip6.addrs[i].addr;
+					if (IN6_IS_ADDR_UNSPECIFIED(a))
+						continue;
 					rc = nl_addr_set(nl_sock_ns,
 							 c->pasta_ifi,
 							 AF_INET6, a, 64);
+					if (rc < 0)
+						break;
 				}
 			} else {
 				rc = nl_addr_dup(nl_sock, c->ifi6,
-- 
2.51.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFC  04/12] conf: Apply -n/--netmask to most recently added address
  2025-12-15  1:54 [RFC 00/12] Support for multiple address and late binding Jon Maloy
                   ` (2 preceding siblings ...)
  2025-12-15  1:54 ` [RFC 03/12] conf: Allow multiple -a/--address options per address family Jon Maloy
@ 2025-12-15  1:54 ` Jon Maloy
  2025-12-15  9:54   ` David Gibson
  2025-12-15  1:54 ` [RFC 05/12] fwd: Check all configured addresses in guest accessibility functions Jon Maloy
                   ` (7 subsequent siblings)
  11 siblings, 1 reply; 25+ messages in thread
From: Jon Maloy @ 2025-12-15  1:54 UTC (permalink / raw)
  To: sbrivio, dgibson, david, jmaloy, passt-dev

We allow for multiple -n/--netmask options, and update the handling so
that each given prefix length is applied to the most recently added
address instead of always to addrs[0].

This allows per-address prefix configuration, such as:

    -a 10.0.0.1 -n 24 -a 10.0.0.2 -n 16

If no address has been added yet, -n still applies to addrs[0] for
backwards compatibility.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 conf.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/conf.c b/conf.c
index e9f217b..0a4a28a 100644
--- a/conf.c
+++ b/conf.c
@@ -1528,6 +1528,7 @@ void conf(struct ctx *c, int argc, char **argv)
 	size_t logsize = 0;
 	char *runas = NULL;
 	long fd_tap_opt;
+	int prefix, idx;
 	int name, ret;
 	uid_t uid;
 	gid_t gid;
@@ -1872,10 +1873,14 @@ void conf(struct ctx *c, int argc, char **argv)
 			die("Invalid address: %s", optarg);
 			break;
 		case 'n':
-			c->ip4.addrs[0].prefix_len = conf_ip4_prefix(optarg);
-			if (c->ip4.addrs[0].prefix_len < 0)
+			prefix = conf_ip4_prefix(optarg);
+
+			if (prefix < 0)
 				die("Invalid netmask: %s", optarg);
 
+			/* Apply to most recent address, or addrs[0] if none yet */
+			idx = c->ip4.addr_count ? c->ip4.addr_count - 1 : 0;
+			c->ip4.addrs[idx].prefix_len = prefix;
 			break;
 		case 'M':
 			parse_mac(c->our_tap_mac, optarg);
-- 
2.51.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFC  05/12] fwd: Check all configured addresses in guest accessibility functions
  2025-12-15  1:54 [RFC 00/12] Support for multiple address and late binding Jon Maloy
                   ` (3 preceding siblings ...)
  2025-12-15  1:54 ` [RFC 04/12] conf: Apply -n/--netmask to most recently added address Jon Maloy
@ 2025-12-15  1:54 ` Jon Maloy
  2025-12-15 10:06   ` David Gibson
  2025-12-15  1:54 ` [RFC 06/12] arp: Check all configured addresses in ARP filtering Jon Maloy
                   ` (6 subsequent siblings)
  11 siblings, 1 reply; 25+ messages in thread
From: Jon Maloy @ 2025-12-15  1:54 UTC (permalink / raw)
  To: sbrivio, dgibson, david, jmaloy, passt-dev

We update fwd_guest_accessible4() and fwd_guest_accessible6() to check
against all addresses in the addrs[] array, not just addrs[0].

This ensures that when multiple addresses are configured via -a options,
traffic using any of them is correctly identified as guest traffic for
NAT and forwarding decisions.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 fwd.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/fwd.c b/fwd.c
index 408af30..ece381d 100644
--- a/fwd.c
+++ b/fwd.c
@@ -502,6 +502,8 @@ static bool is_dns_flow(uint8_t proto, const struct flowside *ini)
 static bool fwd_guest_accessible4(const struct ctx *c,
 				    const struct in_addr *addr)
 {
+	int i;
+
 	if (IN4_IS_ADDR_LOOPBACK(addr))
 		return false;
 
@@ -513,11 +515,15 @@ static bool fwd_guest_accessible4(const struct ctx *c,
 	if (IN4_IS_ADDR_UNSPECIFIED(addr))
 		return false;
 
-	/* For IPv4, addr_seen is initialised to addr, so is always a valid
-	 * address
+	/* Check against all configured guest addresses */
+	for (i = 0; i < c->ip4.addr_count; i++)
+		if (IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addrs[i].addr))
+			return false;
+
+	/* Also check addr_seen: it tracks the address the guest is actually
+	 * using, which may differ from configured addresses.
 	 */
-	if (IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addrs[0].addr) ||
-	    IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addr_seen))
+	if (IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addr_seen))
 		return false;
 
 	return true;
@@ -534,11 +540,15 @@ static bool fwd_guest_accessible4(const struct ctx *c,
 static bool fwd_guest_accessible6(const struct ctx *c,
 				  const struct in6_addr *addr)
 {
+	int i;
+
 	if (IN6_IS_ADDR_LOOPBACK(addr))
 		return false;
 
-	if (IN6_ARE_ADDR_EQUAL(addr, &c->ip6.addrs[0].addr))
-		return false;
+	/* Check against all configured guest addresses */
+	for (i = 0; i < c->ip6.addr_count; i++)
+		if (IN6_ARE_ADDR_EQUAL(addr, &c->ip6.addrs[i].addr))
+			return false;
 
 	/* For IPv6, addr_seen starts unspecified, because we don't know what LL
 	 * address the guest will take until we see it.  Only check against it
-- 
2.51.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFC  06/12] arp: Check all configured addresses in ARP filtering
  2025-12-15  1:54 [RFC 00/12] Support for multiple address and late binding Jon Maloy
                   ` (4 preceding siblings ...)
  2025-12-15  1:54 ` [RFC 05/12] fwd: Check all configured addresses in guest accessibility functions Jon Maloy
@ 2025-12-15  1:54 ` Jon Maloy
  2025-12-15 10:07   ` David Gibson
  2025-12-15  1:54 ` [RFC 07/12] netlink: Subscribe to link/address changes in namespace Jon Maloy
                   ` (5 subsequent siblings)
  11 siblings, 1 reply; 25+ messages in thread
From: Jon Maloy @ 2025-12-15  1:54 UTC (permalink / raw)
  To: sbrivio, dgibson, david, jmaloy, passt-dev

We update ignore_arp() to check against all addresses in the addrs[]
array, not just addrs[0]. This ensures ARP requests for any of the
guest's configured addresses are properly ignored.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 arp.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/arp.c b/arp.c
index 7eaf517..61c309e 100644
--- a/arp.c
+++ b/arp.c
@@ -41,6 +41,8 @@
 static bool ignore_arp(const struct ctx *c,
 		       const struct arphdr *ah, const struct arpmsg *am)
 {
+	int i;
+
 	if (ah->ar_hrd != htons(ARPHRD_ETHER)	||
 	    ah->ar_pro != htons(ETH_P_IP)	||
 	    ah->ar_hln != ETH_ALEN		||
@@ -53,9 +55,10 @@ static bool ignore_arp(const struct ctx *c,
 	    !memcmp(am->sip, am->tip, sizeof(am->sip)))
 		return true;
 
-	/* Don't resolve the guest's assigned address, either. */
-	if (!memcmp(am->tip, &c->ip4.addrs[0].addr, sizeof(am->tip)))
-		return true;
+	/* Don't resolve any of the guest's assigned addresses, either */
+	for (i = 0; i < c->ip4.addr_count; i++)
+		if (!memcmp(am->tip, &c->ip4.addrs[i].addr, sizeof(am->tip)))
+			return true;
 
 	return false;
 }
-- 
2.51.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFC  07/12] netlink: Subscribe to link/address changes in namespace
  2025-12-15  1:54 [RFC 00/12] Support for multiple address and late binding Jon Maloy
                   ` (5 preceding siblings ...)
  2025-12-15  1:54 ` [RFC 06/12] arp: Check all configured addresses in ARP filtering Jon Maloy
@ 2025-12-15  1:54 ` Jon Maloy
  2025-12-15 10:32   ` David Gibson
  2025-12-15  1:54 ` [RFC 08/12] netlink: Subscribe to route " Jon Maloy
                   ` (4 subsequent siblings)
  11 siblings, 1 reply; 25+ messages in thread
From: Jon Maloy @ 2025-12-15  1:54 UTC (permalink / raw)
  To: sbrivio, dgibson, david, jmaloy, passt-dev

We add subscriptions to RTMGRP_LINK, RTMGRP_IPV4_IFADDR, and
RTMGRP_IPV6_IFADDR, so that we can receive notifications when link
state or addresses change on the namespace interface.

When addresses are discovered via netlink:

- We mark them as non-permanent, which means they can be modified or
  deleted by subsequent events.
- We apply the prefix indicated in the notification.
- Update addr_seen to track the new address as the active one.

This provides the foundation for dynamic address monitoring,
and supports runtime network changes.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 epoll_type.h |   2 +
 netlink.c    | 370 +++++++++++++++++++++++++++++++++++++++++++++++++++
 netlink.h    |   3 +
 passt.c      |   5 +
 passt.h      |   1 +
 tap.c        |   6 +-
 6 files changed, 384 insertions(+), 3 deletions(-)

diff --git a/epoll_type.h b/epoll_type.h
index a90ffb6..0a16d94 100644
--- a/epoll_type.h
+++ b/epoll_type.h
@@ -46,6 +46,8 @@ enum epoll_type {
 	EPOLL_TYPE_REPAIR,
 	/* Netlink neighbour subscription socket */
 	EPOLL_TYPE_NL_NEIGH,
+	/* Netlink link/address subscription socket */
+	EPOLL_TYPE_NL_LINKADDR,
 
 	EPOLL_NUM_TYPES,
 };
diff --git a/netlink.c b/netlink.c
index 82a2f0c..7492f17 100644
--- a/netlink.c
+++ b/netlink.c
@@ -35,6 +35,9 @@
 #include "passt.h"
 #include "log.h"
 #include "ip.h"
+#include "tap.h"
+#include "arp.h"
+#include "ndp.h"
 #include "netlink.h"
 #include "epoll_ctl.h"
 
@@ -59,6 +62,7 @@
 int nl_sock		 = -1;
 int nl_sock_ns		 = -1;
 static int nl_sock_neigh = -1;
+static int nl_sock_linkaddr = -1;
 static int nl_seq	 = 1;
 
 /**
@@ -91,6 +95,372 @@ static int nl_sock_init_do(void *arg)
 	return 0;
 }
 
+/**
+ * nl_addr4_find() - Find an IPv4 address in the address array
+ * @c:		Execution context
+ * @addr:	Address to find
+ *
+ * Return: index if found, -1 otherwise
+ */
+static int nl_addr4_find(const struct ctx *c, const struct in_addr *addr)
+{
+	int i;
+
+	for (i = 0; i < c->ip4.addr_count; i++)
+		if (IN4_ARE_ADDR_EQUAL(&c->ip4.addrs[i].addr, addr))
+			return (int)i;
+
+	return -1;
+}
+
+/**
+ * nl_addr6_find() - Find an IPv6 address in the address array
+ * @c:		Execution context
+ * @addr:	Address to find
+ *
+ * Return: index if found, -1 otherwise
+ */
+static int nl_addr6_find(const struct ctx *c, const struct in6_addr *addr)
+{
+	int i;
+
+	for (i = 0; i < c->ip6.addr_count; i++)
+		if (IN6_ARE_ADDR_EQUAL(&c->ip6.addrs[i].addr, addr))
+			return (int)i;
+
+	return -1;
+}
+
+/**
+ * nl_addr4_add() - Add a discovered IPv4 address to the address array
+ * @c:		Execution context
+ * @addr:	Address to add
+ * @prefix_len: Prefix length
+ *
+ * Return: true if added or updated, false if array full or already permanent
+ */
+static bool nl_addr4_add(struct ctx *c, const struct in_addr *addr,
+			 int prefix_len)
+{
+	int idx = nl_addr4_find(c, addr);
+
+	if (idx >= 0) {
+		/* Address exists - if permanent, don't touch; else update */
+		if (c->ip4.addrs[idx].permanent)
+			return false;
+		c->ip4.addrs[idx].prefix_len = prefix_len;
+		return true;
+	}
+
+	/* New address - add if room */
+	if (c->ip4.addr_count >= IP4_MAX_ADDRS) {
+		debug("IPv4 address array full, ignoring discovered address");
+		return false;
+	}
+
+	idx = c->ip4.addr_count++;
+	c->ip4.addrs[idx].addr = *addr;
+	c->ip4.addrs[idx].prefix_len = prefix_len;
+	c->ip4.addrs[idx].permanent = 0;
+	return true;
+}
+
+/**
+ * nl_addr6_add() - Add a discovered IPv6 address to the address array
+ * @c:		Execution context
+ * @addr:	Address to add
+ * @prefix_len: Prefix length
+ *
+ * Return: true if added or updated, false if array full or already permanent
+ */
+static bool nl_addr6_add(struct ctx *c, const struct in6_addr *addr,
+			 int prefix_len)
+{
+	int idx = nl_addr6_find(c, addr);
+
+	if (idx >= 0) {
+		/* Address exists - if permanent, don't touch; else update */
+		if (c->ip6.addrs[idx].permanent)
+			return false;
+		c->ip6.addrs[idx].prefix_len = prefix_len;
+		return true;
+	}
+
+	/* New address - add if room */
+	if (c->ip6.addr_count >= IP6_MAX_ADDRS) {
+		debug("IPv6 address array full, ignoring discovered address");
+		return false;
+	}
+
+	idx = c->ip6.addr_count++;
+	c->ip6.addrs[idx].addr = *addr;
+	c->ip6.addrs[idx].prefix_len = prefix_len;
+	c->ip6.addrs[idx].permanent = 0;
+	return true;
+}
+
+/**
+ * nl_addr4_del() - Remove an IPv4 address from the array if not permanent
+ * @c:		Execution context
+ * @addr:	Address to remove
+ *
+ * Return: true if removed, false if not found or permanent
+ */
+static bool nl_addr4_del(struct ctx *c, const struct in_addr *addr)
+{
+	int i, idx = nl_addr4_find(c, addr);
+
+	if (idx < 0)
+		return false;
+
+	if (c->ip4.addrs[idx].permanent)
+		return false;
+
+	/* Shift remaining entries down */
+	c->ip4.addr_count--;
+	for (i = idx; i < c->ip4.addr_count; i++)
+		c->ip4.addrs[i] = c->ip4.addrs[i + 1];
+
+	return true;
+}
+
+/**
+ * nl_addr6_del() - Remove an IPv6 address from the array if not permanent
+ * @c:		Execution context
+ * @addr:	Address to remove
+ *
+ * Return: true if removed, false if not found or permanent
+ */
+static bool nl_addr6_del(struct ctx *c, const struct in6_addr *addr)
+{
+	int i, idx = nl_addr6_find(c, addr);
+
+	if (idx < 0)
+		return false;
+
+	if (c->ip6.addrs[idx].permanent)
+		return false;
+
+	/* Shift remaining entries down */
+	c->ip6.addr_count--;
+	for (i = idx; i < c->ip6.addr_count; i++)
+		c->ip6.addrs[i] = c->ip6.addrs[i + 1];
+
+	return true;
+}
+
+/**
+ * nl_linkaddr_msg_read() - Parse and log a netlink link/addr message
+ * @c:		Execution context
+ * @nh:	Netlink message header
+ */
+static void nl_linkaddr_msg_read(struct ctx *c, const struct nlmsghdr *nh)
+{
+	if (nh->nlmsg_type == NLMSG_DONE || nh->nlmsg_type == NLMSG_ERROR)
+		return;
+
+	if (nh->nlmsg_type == RTM_NEWLINK || nh->nlmsg_type == RTM_DELLINK) {
+		const struct ifinfomsg *ifm = NLMSG_DATA(nh);
+		struct rtattr *rta = IFLA_RTA(ifm);
+		size_t na = IFLA_PAYLOAD(nh);
+		const char *name = "?";
+		bool up = !!(ifm->ifi_flags & IFF_UP);
+		bool running = !!(ifm->ifi_flags & IFF_RUNNING);
+
+		for (; RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
+			if (rta->rta_type == IFLA_IFNAME) {
+				name = (const char *)RTA_DATA(rta);
+				break;
+			}
+		}
+
+		/* Update pasta interface UP state if this is our interface */
+		if (c->mode == MODE_PASTA &&
+		    (unsigned int)ifm->ifi_index == c->pasta_ifi) {
+			c->pasta_ifi_up = up;
+			debug("Interface %s", up ? "UP" : "DOWN");
+		}
+
+		if (nh->nlmsg_type == RTM_NEWLINK)
+			debug("Link %s (idx=%d): %s %s", name, ifm->ifi_index,
+			     up ? "UP" : "DOWN", running ? "RUNNING" : "");
+		else
+			debug("Link %s (idx=%d): DELETED", name, ifm->ifi_index);
+
+		return;
+	}
+
+	if (nh->nlmsg_type == RTM_NEWADDR || nh->nlmsg_type == RTM_DELADDR) {
+		bool is_new = (nh->nlmsg_type == RTM_NEWADDR);
+		const struct ifaddrmsg *ifa = NLMSG_DATA(nh);
+		char addr_str[INET6_ADDRSTRLEN];
+		struct rtattr *rta = IFA_RTA(ifa);
+		char ifname[IFNAMSIZ] = { 0 };
+		size_t na = IFA_PAYLOAD(nh);
+		void *addr = NULL;
+
+		for (; RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
+			if (ifa->ifa_family == AF_INET &&
+			    rta->rta_type == IFA_LOCAL) {
+				addr = RTA_DATA(rta);
+				break;
+			} else if (ifa->ifa_family == AF_INET6 &&
+				   rta->rta_type == IFA_ADDRESS) {
+				addr = RTA_DATA(rta);
+				break;
+			}
+		}
+
+		if (!addr)
+			return;
+
+		if_indextoname(ifa->ifa_index, ifname);
+		inet_ntop(ifa->ifa_family, addr, addr_str, sizeof(addr_str));
+
+		debug("%s addr on %s (index=%d): %s/%i%s",
+		      is_new ? "NEW" : "DEL", ifname, ifa->ifa_index, addr_str,
+		      ifa->ifa_prefixlen,
+		      tap_is_ready(c) ? " (tap UP)" : " (tap DOWN)");
+
+		/* Only handle our pasta interface */
+		if (c->mode != MODE_PASTA || ifa->ifa_index != c->pasta_ifi)
+			return;
+
+		if (ifa->ifa_family == AF_INET) {
+			struct in_addr *a = (struct in_addr *)addr;
+
+			if (!is_new) {
+				nl_addr4_del(c, a);
+				return;
+			}
+
+			if (nl_addr4_add(c, a, ifa->ifa_prefixlen)) {
+				c->ip4.addr_seen = *a;
+				if (c->pasta_ifi_up && c->ifi4) {
+					debug("Sending ARP");
+					arp_send_init_req(c);
+				}
+			}
+		} else if (ifa->ifa_family == AF_INET6) {
+			struct in6_addr *a = (struct in6_addr *)addr;
+
+			if (!is_new) {
+				nl_addr6_del(c, a);
+				return;
+			}
+
+			if (nl_addr6_add(c, a,
+					 ifa->ifa_prefixlen)) {
+				c->ip6.addr_seen = *a;
+				if (c->pasta_ifi_up &&
+				    c->ifi6 && !c->no_ndp) {
+					debug("Sending NDP");
+					ndp_send_init_req(c);
+				}
+			}
+		}
+	}
+}
+
+/**
+ * nl_linkaddr_notify_handler() - Handle events from link/addr notifier socket
+ * @c:		Execution context
+ */
+void nl_linkaddr_notify_handler(struct ctx *c)
+{
+	char buf[NLBUFSIZ];
+
+	for (;;) {
+		ssize_t n = recv(nl_sock_linkaddr, buf, sizeof(buf), MSG_DONTWAIT);
+		struct nlmsghdr *nh = (struct nlmsghdr *)buf;
+
+		if (n < 0) {
+			if (errno == EINTR)
+				continue;
+			if (errno != EAGAIN)
+				debug("recv() error: %s", strerror_(errno));
+			break;
+		}
+
+		debug("Received %zd bytes", n);
+
+		for (; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n))
+			nl_linkaddr_msg_read(c, nh);
+	}
+}
+
+/**
+ * nl_linkaddr_init_do() - Actually create and bind the netlink socket
+ * @arg:	Execution context (for namespace entry) or NULL
+ *
+ * Return: 0 on success, -1 on failure
+ */
+static int nl_linkaddr_init_do(void *arg)
+{
+	struct sockaddr_nl addr = { .nl_family = AF_NETLINK,
+		.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR |
+			     RTMGRP_IPV6_IFADDR };
+
+	if (arg)
+		ns_enter((struct ctx *)arg);
+
+	nl_sock_linkaddr = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
+	if (nl_sock_linkaddr < 0) {
+		debug("socket() failed: %s", strerror_(errno));
+		return -1;
+	}
+
+	if (bind(nl_sock_linkaddr, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
+		debug("bind() failed: %s", strerror_(errno));
+		close(nl_sock_linkaddr);
+		nl_sock_linkaddr = -1;
+		return -1;
+	}
+
+	debug("socket fd=%d", nl_sock_linkaddr);
+	return 0;
+}
+
+/**
+ * nl_linkaddr_notify_init() - Initialize link/address change notifier
+ * @c:		Execution context
+ *
+ * Return: 0 on success, -1 on failure
+ */
+int nl_linkaddr_notify_init(const struct ctx *c)
+{
+	union epoll_ref ref = { .type = EPOLL_TYPE_NL_LINKADDR };
+	struct epoll_event ev = { .events = EPOLLIN };
+
+	if (nl_sock_linkaddr >= 0) {
+		debug("notifier already initialized (fd=%d)", nl_sock_linkaddr);
+		return 0;
+	}
+
+	/* Open the notifier socket in the namespace for pasta mode,
+	 * or in the init namespace otherwise.
+	 */
+	if (c->mode == MODE_PASTA)
+		NS_CALL(nl_linkaddr_init_do, (void *)c);
+	else
+		nl_linkaddr_init_do(NULL);
+
+	if (nl_sock_linkaddr < 0) {
+		warn("Failed to create/bind link/addr notifier socket");
+		return -1;
+	}
+
+	ev.data.u64 = ref.u64;
+	if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, nl_sock_linkaddr, &ev) == -1) {
+		warn("epoll_ctl() failed on link/addr notifier socket: %s",
+		     strerror_(errno));
+		close(nl_sock_linkaddr);
+		nl_sock_linkaddr = -1;
+		return -1;
+	}
+
+	return 0;
+}
 /**
  * nl_sock_init() - Call nl_sock_init_do(), won't return on failure
  * @c:		Execution context
diff --git a/netlink.h b/netlink.h
index 8f1e9b9..1796a72 100644
--- a/netlink.h
+++ b/netlink.h
@@ -33,4 +33,7 @@ int nl_link_set_flags(int s, unsigned int ifi,
 int nl_neigh_notify_init(const struct ctx *c);
 void nl_neigh_notify_handler(const struct ctx *c);
 
+int nl_linkaddr_notify_init(const struct ctx *c);
+void nl_linkaddr_notify_handler(struct ctx *c);
+
 #endif /* NETLINK_H */
diff --git a/passt.c b/passt.c
index 5ed88d0..f274858 100644
--- a/passt.c
+++ b/passt.c
@@ -80,6 +80,7 @@ char *epoll_type_str[] = {
 	[EPOLL_TYPE_REPAIR_LISTEN]	= "TCP_REPAIR helper listening socket",
 	[EPOLL_TYPE_REPAIR]		= "TCP_REPAIR helper socket",
 	[EPOLL_TYPE_NL_NEIGH]		= "netlink neighbour notifier socket",
+	[EPOLL_TYPE_NL_LINKADDR]	= "netlink link/address notifier socket",
 };
 static_assert(ARRAY_SIZE(epoll_type_str) == EPOLL_NUM_TYPES,
 	      "epoll_type_str[] doesn't match enum epoll_type");
@@ -304,6 +305,9 @@ static void passt_worker(void *opaque, int nfds, struct epoll_event *events)
 		case EPOLL_TYPE_NL_NEIGH:
 			nl_neigh_notify_handler(c);
 			break;
+		case EPOLL_TYPE_NL_LINKADDR:
+			nl_linkaddr_notify_handler(c);
+			break;
 		default:
 			/* Can't happen */
 			ASSERT(0);
@@ -413,6 +417,7 @@ int main(int argc, char **argv)
 
 	fwd_neigh_table_init(&c);
 	nl_neigh_notify_init(&c);
+	nl_linkaddr_notify_init(&c);
 
 	if (!c.foreground) {
 		if ((devnull_fd = open("/dev/null", O_RDWR | O_CLOEXEC)) < 0)
diff --git a/passt.h b/passt.h
index 533f2cb..70ccaf1 100644
--- a/passt.h
+++ b/passt.h
@@ -264,6 +264,7 @@ struct ctx {
 	char pasta_ifn[IF_NAMESIZE];
 	unsigned int pasta_ifi;
 	int pasta_conf_ns;
+	int pasta_ifi_up;	/* Namespace interface is UP */
 
 	int no_tcp;
 	struct tcp_ctx tcp;
diff --git a/tap.c b/tap.c
index 0b96cc1..a2a4459 100644
--- a/tap.c
+++ b/tap.c
@@ -1363,10 +1363,10 @@ bool tap_is_ready(const struct ctx *c)
 		return false;
 
 	if (c->mode == MODE_PASTA) {
-		/* If pasta_conf_ns is set, the interface was configured and
-		 * brought up during initialization. If not, it's still down.
+		/* Check if the namespace interface is actually UP.
+		 * This is tracked by netlink link notifications.
 		 */
-		return c->pasta_conf_ns;
+		return c->pasta_ifi_up;
 	}
 
 	return true;
-- 
2.51.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFC  08/12] netlink: Subscribe to route changes in namespace
  2025-12-15  1:54 [RFC 00/12] Support for multiple address and late binding Jon Maloy
                   ` (6 preceding siblings ...)
  2025-12-15  1:54 ` [RFC 07/12] netlink: Subscribe to link/address changes in namespace Jon Maloy
@ 2025-12-15  1:54 ` Jon Maloy
  2025-12-15 10:38   ` David Gibson
  2025-12-15  1:54 ` [RFC 09/12] netlink: Add host-side monitoring for late template interface binding Jon Maloy
                   ` (3 subsequent siblings)
  11 siblings, 1 reply; 25+ messages in thread
From: Jon Maloy @ 2025-12-15  1:54 UTC (permalink / raw)
  To: sbrivio, dgibson, david, jmaloy, passt-dev

We add subscriptions to RTMGRP_IPV4_ROUTE and RTMGRP_IPV6_ROUTE, so
that we receive notifications when routes change on the namespace
interface.

When default routes change on the pasta interface, we update guest_gw
(and our_tap_addr for IPv4) to reflect the new gateway. This handles
both routes propagated from the host and routes configured manually
by the user inside the namespace.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 netlink.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 46 insertions(+), 3 deletions(-)

diff --git a/netlink.c b/netlink.c
index 7492f17..a8d3116 100644
--- a/netlink.c
+++ b/netlink.c
@@ -195,7 +195,7 @@ static bool nl_addr6_add(struct ctx *c, const struct in6_addr *addr,
 	idx = c->ip6.addr_count++;
 	c->ip6.addrs[idx].addr = *addr;
 	c->ip6.addrs[idx].prefix_len = prefix_len;
-	c->ip6.addrs[idx].permanent = 0;
+	c->ip6.addrs[idxyes].permanent = 0;
 	return true;
 }
 
@@ -359,6 +359,49 @@ static void nl_linkaddr_msg_read(struct ctx *c, const struct nlmsghdr *nh)
 				}
 			}
 		}
+		return;
+	}
+
+	if (nh->nlmsg_type == RTM_NEWROUTE || nh->nlmsg_type == RTM_DELROUTE) {
+		bool is_new = (nh->nlmsg_type == RTM_NEWROUTE);
+		const struct rtmsg *rtm = NLMSG_DATA(nh);
+		struct rtattr *rta = RTM_RTA(rtm);
+		size_t na = RTM_PAYLOAD(nh);
+		unsigned int oif = 0;
+		void *gw = NULL;
+
+		/* Only interested in default routes (dst_len == 0) */
+		if (rtm->rtm_dst_len != 0)
+			return;
+
+		for (; RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
+			if (rta->rta_type == RTA_GATEWAY)
+				gw = RTA_DATA(rta);
+			else if (rta->rta_type == RTA_OIF)
+				oif = *(unsigned int *)RTA_DATA(rta);
+		}
+
+		if (!gw)
+			return;
+
+		/* Only handle our pasta interface */
+		if (c->mode != MODE_PASTA || oif != c->pasta_ifi)
+			return;
+
+		if (rtm->rtm_family == AF_INET) {
+			if (is_new) {
+				c->ip4.guest_gw = *(struct in_addr *)gw;
+				c->ip4.our_tap_addr = c->ip4.guest_gw;
+			} else {
+				c->ip4.guest_gw = (struct in_addr){ 0 };
+				c->ip4.our_tap_addr = (struct in_addr){ 0 };
+			}
+		} else if (rtm->rtm_family == AF_INET6) {
+			if (is_new)
+				c->ip6.guest_gw = *(struct in6_addr *)gw;
+			else
+				c->ip6.guest_gw = (struct in6_addr){ 0 };
+		}
 	}
 }
 
@@ -398,8 +441,8 @@ void nl_linkaddr_notify_handler(struct ctx *c)
 static int nl_linkaddr_init_do(void *arg)
 {
 	struct sockaddr_nl addr = { .nl_family = AF_NETLINK,
-		.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR |
-			     RTMGRP_IPV6_IFADDR };
+		.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
+			     RTMGRP_IPV4_ROUTE | RTMGRP_IPV6_ROUTE };
 
 	if (arg)
 		ns_enter((struct ctx *)arg);
-- 
2.51.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFC  09/12] netlink: Add host-side monitoring for late template interface binding
  2025-12-15  1:54 [RFC 00/12] Support for multiple address and late binding Jon Maloy
                   ` (7 preceding siblings ...)
  2025-12-15  1:54 ` [RFC 08/12] netlink: Subscribe to route " Jon Maloy
@ 2025-12-15  1:54 ` Jon Maloy
  2025-12-15  1:54 ` [RFC 10/12] netlink: Add host-side route monitoring and propagation Jon Maloy
                   ` (2 subsequent siblings)
  11 siblings, 0 replies; 25+ messages in thread
From: Jon Maloy @ 2025-12-15  1:54 UTC (permalink / raw)
  To: sbrivio, dgibson, david, jmaloy, passt-dev

When pasta starts without an active template interface (e.g., WiFi
not yet connected), it falls back to local mode. This change adds
support for late binding: when the template interface gets an address
later, pasta detects this via a host-side netlink socket and
propagates the configuration to the namespace.

Late binding occurs when:
- A specific interface is given via -I and it later gets an address
- No interface is specified, and any interface gets an address.
  In the latter case the first discovered interface is adopted as
  template.

The key changes we make in this commit are:
- We add a host-side netlink socket (nl_sock_linkaddr_host) to
  monitor link and address changes on the template interface.
- We add a nl_linkaddr_host_handler() to process these events
  and propagate addresses to the namespace.
- We add support for late binding: when ifi4/ifi6 are unset, we
  adopt the interface that receives an address - either the one
  specified via -I, or the first one to get an address if -I was
  not given.
- We bring the interface UP after first address is added via late
  binding.
- We retain CAP_NET_ADMIN in isolate_prefork() for pasta mode to allow
  dynamic interface configuration after sandboxing.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 epoll_type.h |   4 +-
 isolation.c  |   4 +
 netlink.c    | 314 +++++++++++++++++++++++++++++++++++++++++++++++++++
 netlink.h    |   3 +
 passt.c      |   4 +
 5 files changed, 328 insertions(+), 1 deletion(-)

diff --git a/epoll_type.h b/epoll_type.h
index 0a16d94..8dc6b8a 100644
--- a/epoll_type.h
+++ b/epoll_type.h
@@ -46,8 +46,10 @@ enum epoll_type {
 	EPOLL_TYPE_REPAIR,
 	/* Netlink neighbour subscription socket */
 	EPOLL_TYPE_NL_NEIGH,
-	/* Netlink link/address subscription socket */
+	/* Netlink link/address subscription socket (namespace) */
 	EPOLL_TYPE_NL_LINKADDR,
+	/* Netlink link/address subscription socket (host, for template) */
+	EPOLL_TYPE_NL_LINKADDR_HOST,
 
 	EPOLL_NUM_TYPES,
 };
diff --git a/isolation.c b/isolation.c
index b25f349..633c396 100644
--- a/isolation.c
+++ b/isolation.c
@@ -356,6 +356,10 @@ int isolate_prefork(const struct ctx *c)
 	if (c->mode == MODE_PASTA) {
 		/* Keep CAP_SYS_ADMIN, so we can enter the netns */
 		ns_caps |= BIT(CAP_SYS_ADMIN);
+		/* Keep CAP_NET_ADMIN for dynamic interface configuration
+		 * (late binding when template interface comes up after start)
+		 */
+		ns_caps |= BIT(CAP_NET_ADMIN);
 		/* Keep CAP_NET_BIND_SERVICE, so we can splice
 		 * outbound connections to low port numbers
 		 */
diff --git a/netlink.c b/netlink.c
index a8d3116..583ada8 100644
--- a/netlink.c
+++ b/netlink.c
@@ -41,6 +41,9 @@
 #include "netlink.h"
 #include "epoll_ctl.h"
 
+/* Default namespace interface name from conf.c */
+extern const char *pasta_default_ifn;
+
 /* Same as RTA_NEXT() but for nexthops: RTNH_NEXT() doesn't take 'attrlen' */
 #define RTNH_NEXT_AND_DEC(rtnh, attrlen)				\
 	((attrlen) -= RTNH_ALIGN((rtnh)->rtnh_len), RTNH_NEXT(rtnh))
@@ -63,6 +66,7 @@ int nl_sock		 = -1;
 int nl_sock_ns		 = -1;
 static int nl_sock_neigh = -1;
 static int nl_sock_linkaddr = -1;
+static int nl_sock_linkaddr_host = -1;
 static int nl_seq	 = 1;
 
 /**
@@ -249,6 +253,175 @@ static bool nl_addr6_del(struct ctx *c, const struct in6_addr *addr)
 	return true;
 }
 
+/**
+ * nl_linkaddr_host_msg_read() - Handle host-side link/addr changes
+ * @c:		Execution context
+ * @nh:	Netlink message header
+ *
+ * Monitor template interface changes and propagate to namespace.
+ * Supports late binding: if no template was detected at startup,
+ * adopt the interface specified by -I when it gets an address.
+ */
+static void nl_linkaddr_host_msg_read(struct ctx *c, const struct nlmsghdr *nh)
+{
+	if (nh->nlmsg_type == NLMSG_DONE || nh->nlmsg_type == NLMSG_ERROR)
+		return;
+
+	if (nh->nlmsg_type == RTM_NEWADDR || nh->nlmsg_type == RTM_DELADDR) {
+		bool is_new = (nh->nlmsg_type == RTM_NEWADDR);
+		const struct ifaddrmsg *ifa = NLMSG_DATA(nh);
+		struct rtattr *rta = IFA_RTA(ifa);
+		size_t na = IFA_PAYLOAD(nh);
+		bool late_binding = false;
+		unsigned int template_ifi;
+		char ifname[IFNAMSIZ];
+		void *addr = NULL;
+		bool is_default;
+		bool is_match;
+		bool unbound;
+
+		/* Get interface name for this message */
+		if (!if_indextoname(ifa->ifa_index, ifname))
+			snprintf(ifname, sizeof(ifname), "?");
+
+		/* Get template interface index, handling late binding.
+		 * Late binding occurs when ifi4/ifi6 <= 0 (local mode) and either:
+		 * - pasta_ifn is set and matches this interface, or
+		 * - pasta_ifn contains the default name
+		 */
+		if (ifa->ifa_family == AF_INET)
+			template_ifi = c->ifi4;
+		else if (ifa->ifa_family == AF_INET6)
+			template_ifi = c->ifi6;
+		else
+			return;
+
+		/* Check for late binding conditions */
+		is_default = !strcmp(c->pasta_ifn, pasta_default_ifn);
+		is_match = !strcmp(ifname, c->pasta_ifn);
+		unbound = (ifa->ifa_family == AF_INET) ?
+			  (int)c->ifi4 <= 0 : (int)c->ifi6 <= 0;
+
+		if (unbound && (is_default || is_match)) {
+			debug("Late binding: using %s as %s template", ifname,
+			      ifa->ifa_family == AF_INET ? "IPv4" : "IPv6");
+
+			if (ifa->ifa_family == AF_INET) {
+				c->ifi4 = ifa->ifa_index;
+				template_ifi = c->ifi4;
+			} else {
+				c->ifi6 = ifa->ifa_index;
+				template_ifi = c->ifi6;
+			}
+			late_binding = true;
+
+			if (is_default)
+				snprintf(c->pasta_ifn, sizeof(c->pasta_ifn),
+					 "%s", ifname);
+		}
+
+		if (ifa->ifa_index != template_ifi)
+			return;
+
+		/* Re-initialize rta/na for attribute parsing */
+		rta = IFA_RTA(ifa);
+		na = IFA_PAYLOAD(nh);
+
+		for (; RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
+			if (ifa->ifa_family == AF_INET &&
+			    rta->rta_type == IFA_LOCAL) {
+				addr = RTA_DATA(rta);
+				break;
+			} else if (ifa->ifa_family == AF_INET6 &&
+				   rta->rta_type == IFA_ADDRESS) {
+				addr = RTA_DATA(rta);
+				break;
+			}
+		}
+
+		if (!addr) {
+			info("No addr found in netlink linkaddr message");
+			return;
+		}
+
+		if (ifa->ifa_family == AF_INET) {
+			struct in_addr *a = (struct in_addr *)addr;
+			char buf[INET_ADDRSTRLEN];
+			int rc;
+
+			inet_ntop(AF_INET, a, buf, sizeof(buf));
+
+			if (!is_new) {
+				nl_addr4_del(c, a);
+				nl_addr_del(nl_sock_ns, c->pasta_ifi,
+					    AF_INET, a, ifa->ifa_prefixlen);
+				return;
+			}
+			rc = nl_addr_set(nl_sock_ns, c->pasta_ifi,
+					 AF_INET, a,
+					 ifa->ifa_prefixlen);
+			if (rc < 0) {
+				debug("Failed to add %s/%u to ns: %s", buf,
+				      ifa->ifa_prefixlen, strerror_(-rc));
+			} else {
+				nl_addr4_add(c, a, ifa->ifa_prefixlen);
+				c->ip4.addr_seen = *a;
+				debug("Added %s/%u to namespace",
+				      buf, ifa->ifa_prefixlen);
+
+				/* Bring interface UP on late binding */
+				if (late_binding && !c->pasta_ifi_up) {
+					nl_link_set_flags(nl_sock_ns,
+							  c->pasta_ifi,
+							  IFF_UP, IFF_UP);
+					c->pasta_ifi_up = 1;
+					debug("Brought interface up");
+				}
+				if (late_binding || c->pasta_ifi_up)
+					arp_send_init_req(c);
+			}
+		} else if (ifa->ifa_family == AF_INET6) {
+			struct in6_addr *a = (struct in6_addr *)addr;
+			char buf[INET6_ADDRSTRLEN];
+			int rc;
+
+			inet_ntop(AF_INET6, a, buf, sizeof(buf));
+
+			if (!is_new) {
+				nl_addr6_del(c, a);
+				nl_addr_del(nl_sock_ns, c->pasta_ifi,
+					    AF_INET6, a, ifa->ifa_prefixlen);
+				return;
+			}
+			rc = nl_addr_set(nl_sock_ns, c->pasta_ifi,
+					 AF_INET6, a, ifa->ifa_prefixlen);
+			if (rc < 0) {
+				debug("Failed to add %s/%u to ns: %s",
+				      buf, ifa->ifa_prefixlen,
+				      strerror_(-rc));
+			} else {
+				nl_addr6_add(c, a, ifa->ifa_prefixlen);
+				c->ip6.addr_seen = *a;
+				debug("Added %s/%u to namespace",
+				      buf, ifa->ifa_prefixlen);
+
+				/* Bring interface UP on late binding */
+				if (late_binding && !c->pasta_ifi_up) {
+					nl_link_set_flags(nl_sock_ns,
+							  c->pasta_ifi,
+							  IFF_UP, IFF_UP);
+					c->pasta_ifi_up = 1;
+					debug("Brought interface up");
+				}
+				if ((late_binding || c->pasta_ifi_up) &&
+				    !c->no_ndp)
+					ndp_send_init_req(c);
+			}
+		}
+		return;
+	}
+}
+
 /**
  * nl_linkaddr_msg_read() - Parse and log a netlink link/addr message
  * @c:		Execution context
@@ -432,6 +605,36 @@ void nl_linkaddr_notify_handler(struct ctx *c)
 	}
 }
 
+/**
+ * nl_linkaddr_host_handler() - Handle events from host link/addr notifier
+ * @c:		Execution context
+ *
+ * Monitor template interface changes and propagate to namespace
+ */
+void nl_linkaddr_host_handler(struct ctx *c)
+{
+	char buf[NLBUFSIZ];
+
+	for (;;) {
+		ssize_t n = recv(nl_sock_linkaddr_host, buf, sizeof(buf),
+				 MSG_DONTWAIT);
+		struct nlmsghdr *nh = (struct nlmsghdr *)buf;
+
+		if (n < 0) {
+			if (errno == EINTR)
+				continue;
+			if (errno != EAGAIN)
+				info("Host recv() error: %s", strerror_(errno));
+			break;
+		}
+
+		info("Host netlink: received %zd bytes", n);
+
+		for (; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n))
+			nl_linkaddr_host_msg_read(c, nh);
+	}
+}
+
 /**
  * nl_linkaddr_init_do() - Actually create and bind the netlink socket
  * @arg:	Execution context (for namespace entry) or NULL
@@ -464,6 +667,38 @@ static int nl_linkaddr_init_do(void *arg)
 	return 0;
 }
 
+/**
+ * nl_linkaddr_host_init_do() - Create host-side link/addr notifier socket
+ * @arg:	Unused
+ *
+ * Return: 0 on success, -1 on failure
+ */
+static int nl_linkaddr_host_init_do(void *arg)
+{
+	struct sockaddr_nl addr = { .nl_family = AF_NETLINK,
+		.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR };
+
+	(void)arg;
+
+	nl_sock_linkaddr_host = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC,
+				       NETLINK_ROUTE);
+	if (nl_sock_linkaddr_host < 0) {
+		debug("socket() failed for host: %s", strerror_(errno));
+		return -1;
+	}
+
+	if (bind(nl_sock_linkaddr_host, (struct sockaddr *)&addr,
+		 sizeof(addr)) < 0) {
+		debug("bind() failed for host: %s", strerror_(errno));
+		close(nl_sock_linkaddr_host);
+		nl_sock_linkaddr_host = -1;
+		return -1;
+	}
+
+	debug("host socket fd=%d", nl_sock_linkaddr_host);
+	return 0;
+}
+
 /**
  * nl_linkaddr_notify_init() - Initialize link/address change notifier
  * @c:		Execution context
@@ -502,6 +737,33 @@ int nl_linkaddr_notify_init(const struct ctx *c)
 		return -1;
 	}
 
+	debug("namespace socket fd=%d", nl_sock_linkaddr);
+
+	/* In PASTA mode, also create a host-side socket to monitor
+	 * template interface changes
+	 */
+	if (c->mode == MODE_PASTA) {
+		nl_linkaddr_host_init_do(NULL);
+
+		if (nl_sock_linkaddr_host < 0) {
+			warn("Failed to create host link/addr notifier socket");
+			/* Non-fatal - continue without host monitoring */
+		} else {
+			ref.type = EPOLL_TYPE_NL_LINKADDR_HOST;
+			ev.data.u64 = ref.u64;
+			if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD,
+				      nl_sock_linkaddr_host, &ev) == -1) {
+				warn("epoll_ctl() failed on host notifier: %s",
+				     strerror_(errno));
+				close(nl_sock_linkaddr_host);
+				nl_sock_linkaddr_host = -1;
+			} else {
+				info("Host netlink socket fd=%d, pasta_ifn=%s",
+				     nl_sock_linkaddr_host, c->pasta_ifn);
+			}
+		}
+	}
+
 	return 0;
 }
 /**
@@ -1340,6 +1602,58 @@ int nl_addr_set(int s, unsigned int ifi, sa_family_t af,
 	return nl_do(s, &req, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL, len);
 }
 
+/**
+ * nl_addr_del() - Delete IP address from given interface
+ * @s:		Netlink socket
+ * @ifi:	Interface index
+ * @af:		Address family
+ * @addr:	Address to delete
+ * @prefix_len:	Prefix length
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+int nl_addr_del(int s, unsigned int ifi, sa_family_t af,
+		const void *addr, int prefix_len)
+{
+	struct req_t {
+		struct nlmsghdr nlh;
+		struct ifaddrmsg ifa;
+		union {
+			struct {
+				struct rtattr rta_l;
+				struct in_addr l;
+			} a4;
+			struct {
+				struct rtattr rta_l;
+				struct in6_addr l;
+			} a6;
+		} del;
+	} req = {
+		.ifa.ifa_family    = af,
+		.ifa.ifa_index     = ifi,
+		.ifa.ifa_prefixlen = prefix_len,
+	};
+	ssize_t len;
+
+	if (af == AF_INET6) {
+		size_t rta_len = RTA_LENGTH(sizeof(req.del.a6.l));
+
+		len = offsetof(struct req_t, del.a6) + sizeof(req.del.a6);
+		memcpy(&req.del.a6.l, addr, sizeof(req.del.a6.l));
+		req.del.a6.rta_l.rta_len = rta_len;
+		req.del.a6.rta_l.rta_type = IFA_LOCAL;
+	} else {
+		size_t rta_len = RTA_LENGTH(sizeof(req.del.a4.l));
+
+		len = offsetof(struct req_t, del.a4) + sizeof(req.del.a4);
+		memcpy(&req.del.a4.l, addr, sizeof(req.del.a4.l));
+		req.del.a4.rta_l.rta_len = rta_len;
+		req.del.a4.rta_l.rta_type = IFA_LOCAL;
+	}
+
+	return nl_do(s, &req, RTM_DELADDR, 0, len);
+}
+
 /**
  * nl_addr_dup() - Copy IP addresses for given interface and address family
  * @s_src:	Netlink socket in source network namespace
diff --git a/netlink.h b/netlink.h
index 1796a72..f65ae10 100644
--- a/netlink.h
+++ b/netlink.h
@@ -35,5 +35,8 @@ void nl_neigh_notify_handler(const struct ctx *c);
 
 int nl_linkaddr_notify_init(const struct ctx *c);
 void nl_linkaddr_notify_handler(struct ctx *c);
+void nl_linkaddr_host_handler(struct ctx *c);
+int nl_addr_del(int s, unsigned int ifi, sa_family_t af,
+		const void *addr, int prefix_len);
 
 #endif /* NETLINK_H */
diff --git a/passt.c b/passt.c
index f274858..438dac8 100644
--- a/passt.c
+++ b/passt.c
@@ -81,6 +81,7 @@ char *epoll_type_str[] = {
 	[EPOLL_TYPE_REPAIR]		= "TCP_REPAIR helper socket",
 	[EPOLL_TYPE_NL_NEIGH]		= "netlink neighbour notifier socket",
 	[EPOLL_TYPE_NL_LINKADDR]	= "netlink link/address notifier socket",
+	[EPOLL_TYPE_NL_LINKADDR_HOST]	= "netlink host link/address notifier socket",
 };
 static_assert(ARRAY_SIZE(epoll_type_str) == EPOLL_NUM_TYPES,
 	      "epoll_type_str[] doesn't match enum epoll_type");
@@ -308,6 +309,9 @@ static void passt_worker(void *opaque, int nfds, struct epoll_event *events)
 		case EPOLL_TYPE_NL_LINKADDR:
 			nl_linkaddr_notify_handler(c);
 			break;
+		case EPOLL_TYPE_NL_LINKADDR_HOST:
+			nl_linkaddr_host_handler(c);
+			break;
 		default:
 			/* Can't happen */
 			ASSERT(0);
-- 
2.51.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFC  10/12] netlink: Add host-side route monitoring and propagation
  2025-12-15  1:54 [RFC 00/12] Support for multiple address and late binding Jon Maloy
                   ` (8 preceding siblings ...)
  2025-12-15  1:54 ` [RFC 09/12] netlink: Add host-side monitoring for late template interface binding Jon Maloy
@ 2025-12-15  1:54 ` Jon Maloy
  2025-12-15  1:54 ` [RFC 11/12] netlink: Prevent host route events from overwriting guest-configured gateway Jon Maloy
  2025-12-15  1:54 ` [RFC 12/12] netlink: Rename tap interface when late binding discovers template name Jon Maloy
  11 siblings, 0 replies; 25+ messages in thread
From: Jon Maloy @ 2025-12-15  1:54 UTC (permalink / raw)
  To: sbrivio, dgibson, david, jmaloy, passt-dev

We extend host-side netlink monitoring to also track default route
changes on the template interface and propagate them to the namespace.

- Subscribe to RTMGRP_IPV4_ROUTE and RTMGRP_IPV6_ROUTE groups on the
  host-side netlink socket
- Handle RTM_NEWROUTE/RTM_DELROUTE events for default routes.
- Support late binding via routes: if no template interface is bound
  yet, adopt the interface in question when a default route appears
  on it.
- When a default route is added, set guest_gw/our_tap_addr and
  propagate the route to the namespace via nl_route_set_def()
- When a default route is removed, clear guest_gw/our_tap_addr

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 netlink.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 97 insertions(+), 3 deletions(-)

diff --git a/netlink.c b/netlink.c
index 583ada8..d049239 100644
--- a/netlink.c
+++ b/netlink.c
@@ -199,7 +199,7 @@ static bool nl_addr6_add(struct ctx *c, const struct in6_addr *addr,
 	idx = c->ip6.addr_count++;
 	c->ip6.addrs[idx].addr = *addr;
 	c->ip6.addrs[idx].prefix_len = prefix_len;
-	c->ip6.addrs[idxyes].permanent = 0;
+	c->ip6.addrs[idx].permanent = 0;
 	return true;
 }
 
@@ -254,7 +254,7 @@ static bool nl_addr6_del(struct ctx *c, const struct in6_addr *addr)
 }
 
 /**
- * nl_linkaddr_host_msg_read() - Handle host-side link/addr changes
+ * nl_linkaddr_host_msg_read() - Handle host-side link/addr/route changes
  * @c:		Execution context
  * @nh:	Netlink message header
  *
@@ -420,6 +420,99 @@ static void nl_linkaddr_host_msg_read(struct ctx *c, const struct nlmsghdr *nh)
 		}
 		return;
 	}
+
+	if (nh->nlmsg_type == RTM_NEWROUTE || nh->nlmsg_type == RTM_DELROUTE) {
+		bool is_new = (nh->nlmsg_type == RTM_NEWROUTE);
+		const struct rtmsg *rtm = NLMSG_DATA(nh);
+		struct rtattr *rta = RTM_RTA(rtm);
+		size_t na = RTM_PAYLOAD(nh);
+		unsigned int template_ifi;
+		char ifname[IFNAMSIZ];
+		unsigned int oif = 0;
+		void *gw = NULL;
+		bool is_default;
+		bool is_match;
+		bool unbound;
+
+		/* Only interested in default routes */
+		if (rtm->rtm_dst_len != 0)
+			return;
+
+		for (; RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
+			if (rta->rta_type == RTA_GATEWAY)
+				gw = RTA_DATA(rta);
+			else if (rta->rta_type == RTA_OIF)
+				oif = *(unsigned int *)RTA_DATA(rta);
+		}
+
+		if (!gw || !oif)
+			return;
+
+		/* Get interface name for late binding check */
+		if (!if_indextoname(oif, ifname))
+			return;
+
+		/* Check for late binding conditions */
+		is_default = !strcmp(c->pasta_ifn, pasta_default_ifn);
+		is_match = !strcmp(ifname, c->pasta_ifn);
+
+		if (rtm->rtm_family == AF_INET)
+			template_ifi = c->ifi4;
+		else if (rtm->rtm_family == AF_INET6)
+			template_ifi = c->ifi6;
+		else
+			return;
+
+		unbound = (rtm->rtm_family == AF_INET) ?
+			  (int)c->ifi4 <= 0 : (int)c->ifi6 <= 0;
+
+		if (unbound && (is_default || is_match)) {
+			debug("Late binding (route): using %s as %s template",
+			      ifname,
+			      rtm->rtm_family == AF_INET ? "IPv4" : "IPv6");
+
+			if (rtm->rtm_family == AF_INET) {
+				c->ifi4 = oif;
+				template_ifi = c->ifi4;
+			} else {
+				c->ifi6 = oif;
+				template_ifi = c->ifi6;
+			}
+
+			if (is_default)
+				snprintf(c->pasta_ifn, sizeof(c->pasta_ifn),
+					 "%s", ifname);
+		}
+
+		if (oif != template_ifi)
+			return;
+
+		if (rtm->rtm_family == AF_INET) {
+			char buf[INET_ADDRSTRLEN];
+
+			if (!is_new) {
+				c->ip4.guest_gw = (struct in_addr){ 0 };
+				c->ip4.our_tap_addr = (struct in_addr){ 0 };
+				return;
+			}
+			c->ip4.guest_gw = *(struct in_addr *)gw;
+			c->ip4.our_tap_addr = c->ip4.guest_gw;
+			nl_route_set_def(nl_sock_ns, c->pasta_ifi, AF_INET, gw);
+			inet_ntop(AF_INET, &c->ip4.guest_gw, buf, sizeof(buf));
+			debug("Set IPv4 default route via %s", buf);
+		} else if (rtm->rtm_family == AF_INET6) {
+			char buf[INET6_ADDRSTRLEN];
+
+			if (!is_new) {
+				c->ip6.guest_gw = (struct in6_addr){ 0 };
+				return;
+			}
+			c->ip6.guest_gw = *(struct in6_addr *)gw;
+			nl_route_set_def(nl_sock_ns, c->pasta_ifi, AF_INET6, gw);
+			inet_ntop(AF_INET6, &c->ip6.guest_gw, buf, sizeof(buf));
+			debug("Set IPv6 default route via %s", buf);
+		}
+	}
 }
 
 /**
@@ -676,7 +769,8 @@ static int nl_linkaddr_init_do(void *arg)
 static int nl_linkaddr_host_init_do(void *arg)
 {
 	struct sockaddr_nl addr = { .nl_family = AF_NETLINK,
-		.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR };
+		.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
+			     RTMGRP_IPV4_ROUTE | RTMGRP_IPV6_ROUTE };
 
 	(void)arg;
 
-- 
2.51.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFC  11/12] netlink: Prevent host route events from overwriting guest-configured gateway
  2025-12-15  1:54 [RFC 00/12] Support for multiple address and late binding Jon Maloy
                   ` (9 preceding siblings ...)
  2025-12-15  1:54 ` [RFC 10/12] netlink: Add host-side route monitoring and propagation Jon Maloy
@ 2025-12-15  1:54 ` Jon Maloy
  2025-12-15  1:54 ` [RFC 12/12] netlink: Rename tap interface when late binding discovers template name Jon Maloy
  11 siblings, 0 replies; 25+ messages in thread
From: Jon Maloy @ 2025-12-15  1:54 UTC (permalink / raw)
  To: sbrivio, dgibson, david, jmaloy, passt-dev

We add a new context flag 'guest_gw_from_ns' to track if the gateway
was configured from the guest side, something that might have happened
either via the -g option or by a namespace-side netlink event. When set,
host side route events will not be permitted to overwrite the guest's
gateway configuration.

This gives any gateway setting from the guest side precedence over any
ditto coming from the host side.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 conf.c    |  2 ++
 netlink.c | 15 +++++++++++++--
 passt.h   |  4 ++++
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/conf.c b/conf.c
index 0a4a28a..f26637c 100644
--- a/conf.c
+++ b/conf.c
@@ -1891,6 +1891,7 @@ void conf(struct ctx *c, int argc, char **argv)
 			    !IN6_IS_ADDR_LOOPBACK(&c->ip6.guest_gw)) {
 				if (c->mode == MODE_PASTA)
 					c->ip6.no_copy_routes = true;
+				c->ip6.guest_gw_from_ns = true;
 				break;
 			}
 
@@ -1900,6 +1901,7 @@ void conf(struct ctx *c, int argc, char **argv)
 			    !IN4_IS_ADDR_LOOPBACK(&c->ip4.guest_gw)) {
 				if (c->mode == MODE_PASTA)
 					c->ip4.no_copy_routes = true;
+				c->ip4.guest_gw_from_ns = true;
 				break;
 			}
 
diff --git a/netlink.c b/netlink.c
index d049239..de04fb7 100644
--- a/netlink.c
+++ b/netlink.c
@@ -490,6 +490,9 @@ static void nl_linkaddr_host_msg_read(struct ctx *c, const struct nlmsghdr *nh)
 		if (rtm->rtm_family == AF_INET) {
 			char buf[INET_ADDRSTRLEN];
 
+			if (c->ip4.guest_gw_from_ns)
+				return;
+
 			if (!is_new) {
 				c->ip4.guest_gw = (struct in_addr){ 0 };
 				c->ip4.our_tap_addr = (struct in_addr){ 0 };
@@ -503,6 +506,9 @@ static void nl_linkaddr_host_msg_read(struct ctx *c, const struct nlmsghdr *nh)
 		} else if (rtm->rtm_family == AF_INET6) {
 			char buf[INET6_ADDRSTRLEN];
 
+			if (c->ip6.guest_gw_from_ns)
+				return;
+
 			if (!is_new) {
 				c->ip6.guest_gw = (struct in6_addr){ 0 };
 				return;
@@ -658,15 +664,20 @@ static void nl_linkaddr_msg_read(struct ctx *c, const struct nlmsghdr *nh)
 			if (is_new) {
 				c->ip4.guest_gw = *(struct in_addr *)gw;
 				c->ip4.our_tap_addr = c->ip4.guest_gw;
+				c->ip4.guest_gw_from_ns = true;
 			} else {
 				c->ip4.guest_gw = (struct in_addr){ 0 };
 				c->ip4.our_tap_addr = (struct in_addr){ 0 };
+				c->ip4.guest_gw_from_ns = false;
 			}
 		} else if (rtm->rtm_family == AF_INET6) {
-			if (is_new)
+			if (is_new) {
 				c->ip6.guest_gw = *(struct in6_addr *)gw;
-			else
+				c->ip6.guest_gw_from_ns = true;
+			} else {
 				c->ip6.guest_gw = (struct in6_addr){ 0 };
+				c->ip6.guest_gw_from_ns = false;
+			}
 		}
 	}
 }
diff --git a/passt.h b/passt.h
index 70ccaf1..5e7bc99 100644
--- a/passt.h
+++ b/passt.h
@@ -82,6 +82,7 @@ enum passt_modes {
  * @ifname_out:		Optional interface name to bind outbound sockets to
  * @no_copy_routes:	Don't copy all routes when configuring target namespace
  * @no_copy_addrs:	Don't copy all addresses when configuring namespace
+ * @guest_gw_from_ns:	Gateway was set from namespace (config or ns event)
  */
 struct ip4_ctx {
 	/* PIF_TAP addresses */
@@ -104,6 +105,7 @@ struct ip4_ctx {
 
 	bool no_copy_routes;
 	bool no_copy_addrs;
+	bool guest_gw_from_ns;
 };
 
 /**
@@ -125,6 +127,7 @@ struct ip4_ctx {
  * @ifname_out:		Optional interface name to bind outbound sockets to
  * @no_copy_routes:	Don't copy all routes when configuring target namespace
  * @no_copy_addrs:	Don't copy all addresses when configuring namespace
+ * @guest_gw_from_ns:	Gateway was set from namespace (config or ns event)
  */
 struct ip6_ctx {
 	/* PIF_TAP addresses */
@@ -148,6 +151,7 @@ struct ip6_ctx {
 
 	bool no_copy_routes;
 	bool no_copy_addrs;
+	bool guest_gw_from_ns;
 };
 
 #include <netinet/if_ether.h>
-- 
2.51.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFC  12/12] netlink: Rename tap interface when late binding discovers template name
  2025-12-15  1:54 [RFC 00/12] Support for multiple address and late binding Jon Maloy
                   ` (10 preceding siblings ...)
  2025-12-15  1:54 ` [RFC 11/12] netlink: Prevent host route events from overwriting guest-configured gateway Jon Maloy
@ 2025-12-15  1:54 ` Jon Maloy
  11 siblings, 0 replies; 25+ messages in thread
From: Jon Maloy @ 2025-12-15  1:54 UTC (permalink / raw)
  To: sbrivio, dgibson, david, jmaloy, passt-dev

When pasta starts without a template interface (-I), it creates the tap
device with the default name (tap0). Later, when late binding discovers
the actual template interface, we now rename the device to match it.

This ensures the namespace interface has the expected name, matching
the discovered host interface.

Key changes:
- Add nl_link_rename() function to rename network interfaces
- When late binding triggers with the default TAP name, bring it
  down, rename it to match the discovered interface, then bring it up

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 netlink.c | 37 +++++++++++++++++++++++++++++++++++++
 tap.c     | 27 +++++++++++++++++++++++++++
 tap.h     |  1 +
 3 files changed, 65 insertions(+)

diff --git a/netlink.c b/netlink.c
index de04fb7..71089ab 100644
--- a/netlink.c
+++ b/netlink.c
@@ -44,6 +44,8 @@
 /* Default namespace interface name from conf.c */
 extern const char *pasta_default_ifn;
 
+static int nl_link_rename(int s, unsigned int ifi, const char *name);
+
 /* Same as RTA_NEXT() but for nexthops: RTNH_NEXT() doesn't take 'attrlen' */
 #define RTNH_NEXT_AND_DEC(rtnh, attrlen)				\
 	((attrlen) -= RTNH_ALIGN((rtnh)->rtnh_len), RTNH_NEXT(rtnh))
@@ -315,6 +317,14 @@ static void nl_linkaddr_host_msg_read(struct ctx *c, const struct nlmsghdr *nh)
 			}
 			late_binding = true;
 
+			/* Rename interface if it is still using default name */
+			if (is_default && strcmp(ifname, pasta_default_ifn)) {
+				nl_link_set_flags(nl_sock_ns, c->pasta_ifi,
+						  0, IFF_UP);
+				nl_link_rename(nl_sock_ns, c->pasta_ifi, ifname);
+				debug("Renamed tap: %s -> %s",
+				      pasta_default_ifn, ifname);
+			}
 			if (is_default)
 				snprintf(c->pasta_ifn, sizeof(c->pasta_ifn),
 					 "%s", ifname);
@@ -1943,6 +1953,33 @@ int nl_link_set_flags(int s, unsigned int ifi,
 	return nl_do(s, &req, RTM_NEWLINK, 0, sizeof(req));
 }
 
+/**
+ * nl_link_rename() - Rename a network interface
+ * @s:		Netlink socket
+ * @ifi:	Interface index
+ * @name:	New interface name
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+static int nl_link_rename(int s, unsigned int ifi, const char *name)
+{
+	struct req_t {
+		struct nlmsghdr nlh;
+		struct ifinfomsg ifm;
+		struct rtattr rta;
+		char name[IFNAMSIZ];
+	} req = {
+		.ifm.ifi_family	  = AF_UNSPEC,
+		.ifm.ifi_index	  = ifi,
+		.rta.rta_type	  = IFLA_IFNAME,
+		.rta.rta_len	  = RTA_LENGTH(IFNAMSIZ),
+	};
+
+	snprintf(req.name, IFNAMSIZ, "%s", name);
+
+	return nl_do(s, &req, RTM_NEWLINK, 0, sizeof(req));
+}
+
 /**
  * nl_neigh_msg_read() - Interpret a neighbour state message from netlink
  * @c:		Execution context
diff --git a/tap.c b/tap.c
index a2a4459..cd59160 100644
--- a/tap.c
+++ b/tap.c
@@ -1502,6 +1502,33 @@ static void tap_sock_tun_init(struct ctx *c)
 	tap_start_connection(c);
 }
 
+/**
+ * tap_backend_init_late() - Create tap device for late binding
+ * @c:		Execution context
+ *
+ * Called when late binding discovers the template interface name.
+ * Creates the TAP device with the discovered name.
+ */
+void tap_backend_init_late(struct ctx *c)
+{
+	if (c->mode != MODE_PASTA || c->fd_tap != -1)
+		return;
+
+	if (!*c->pasta_ifn) {
+		warn("%s called with empty pasta_ifn", __func__);
+		return;
+	}
+
+	NS_CALL(tap_ns_tun, c);
+	if (c->fd_tap == -1) {
+		err("Failed to set up tap device in namespace");
+		return;
+	}
+
+	tap_start_connection(c);
+	info("Created tap device %s for late binding", c->pasta_ifn);
+}
+
 /**
  * tap_sock_update_pool() - Set the buffer base and size for the pool of packets
  * @base:	Buffer base
diff --git a/tap.h b/tap.h
index ee22a9d..b41eae9 100644
--- a/tap.h
+++ b/tap.h
@@ -118,6 +118,7 @@ void tap_handler_passt(struct ctx *c, uint32_t events,
 int tap_sock_unix_open(char *sock_path);
 void tap_sock_reset(struct ctx *c);
 void tap_backend_init(struct ctx *c);
+void tap_backend_init_late(struct ctx *c);
 void tap_flush_pools(void);
 void tap_handler(struct ctx *c, const struct timespec *now);
 void tap_add_packet(struct ctx *c, struct iov_tail *data,
-- 
2.51.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC  01/12] ip: Introduce multi-address data structures for IPv4 and IPv6
  2025-12-15  1:54 ` [RFC 01/12] ip: Introduce multi-address data structures for IPv4 and IPv6 Jon Maloy
@ 2025-12-15  9:40   ` David Gibson
  2025-12-15 22:05     ` Jon Maloy
  2025-12-15  9:46   ` David Gibson
  1 sibling, 1 reply; 25+ messages in thread
From: David Gibson @ 2025-12-15  9:40 UTC (permalink / raw)
  To: Jon Maloy; +Cc: sbrivio, dgibson, passt-dev

[-- Attachment #1: Type: text/plain, Size: 20516 bytes --]

On Sun, Dec 14, 2025 at 08:54:30PM -0500, Jon Maloy wrote:
> As preparation for supporting multiple addresses per interface, we
> replace the single addr/prefix_len fields with arrays.
> 
> - We add an ip4_addr_entry and an ip6_addr_entry struct containing
>   address and prefix length.
> 
> - We set the array sizes to IP4_MAX_ADDRS=8 and IP6_MAX_ADDRS=16,
>   respectively.
> 
> The only functional change is that the IPv6 prefix length now is
> properly stored instead of being hardcoded to 64 even when set
> via the -a option.
> 
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>
> ---
>  arp.c    |  4 +--
>  conf.c   | 97 +++++++++++++++++++++++++++++++++-----------------------
>  dhcp.c   |  8 ++---
>  dhcpv6.c |  6 ++--
>  fwd.c    | 12 +++----
>  ip.h     | 26 +++++++++++++++
>  ndp.c    |  6 ++--
>  passt.h  | 16 ++++++----
>  pasta.c  | 12 ++++---
>  tap.c    |  4 +--
>  util.h   |  1 +
>  11 files changed, 122 insertions(+), 70 deletions(-)
> 
> diff --git a/arp.c b/arp.c
> index bb042e9..7eaf517 100644
> --- a/arp.c
> +++ b/arp.c
> @@ -54,7 +54,7 @@ static bool ignore_arp(const struct ctx *c,
>  		return true;
>  
>  	/* Don't resolve the guest's assigned address, either. */
> -	if (!memcmp(am->tip, &c->ip4.addr, sizeof(am->tip)))
> +	if (!memcmp(am->tip, &c->ip4.addrs[0].addr, sizeof(am->tip)))
>  		return true;
>  
>  	return false;
> @@ -145,7 +145,7 @@ void arp_send_init_req(const struct ctx *c)
>  	memcpy(req.am.sha,	c->our_tap_mac,		sizeof(req.am.sha));
>  	memcpy(req.am.sip,	&c->ip4.our_tap_addr,	sizeof(req.am.sip));
>  	memcpy(req.am.tha,	MAC_BROADCAST,		sizeof(req.am.tha));
> -	memcpy(req.am.tip,	&c->ip4.addr,		sizeof(req.am.tip));
> +	memcpy(req.am.tip,	&c->ip4.addrs[0].addr,	sizeof(req.am.tip));
>  
>  	debug("Sending initial ARP request for guest MAC address");
>  	tap_send_single(c, &req, sizeof(req));
> diff --git a/conf.c b/conf.c
> index fdc19e8..0e96f36 100644
> --- a/conf.c
> +++ b/conf.c
> @@ -694,10 +694,12 @@ static int conf_ip4_prefix(const char *arg)
>   * conf_ip4() - Verify or detect IPv4 support, get relevant addresses
>   * @ifi:	Host interface to attempt (0 to determine one)
>   * @ip4:	IPv4 context (will be written)
> + * @permanent:	Mark configured addresses as permanent
>   *
>   * Return: interface index for IPv4, or 0 on failure.
>   */
> -static unsigned int conf_ip4(unsigned int ifi, struct ip4_ctx *ip4)
> +static unsigned int conf_ip4(unsigned int ifi, struct ip4_ctx *ip4,
> +			     bool permanent)
>  {
>  	if (!ifi)
>  		ifi = nl_get_ext_if(nl_sock, AF_INET);
> @@ -717,33 +719,38 @@ static unsigned int conf_ip4(unsigned int ifi, struct ip4_ctx *ip4)
>  		}
>  	}
>  
> -	if (IN4_IS_ADDR_UNSPECIFIED(&ip4->addr)) {
> +	if (!ip4->addr_count) {
>  		int rc = nl_addr_get(nl_sock, ifi, AF_INET,
> -				     &ip4->addr, &ip4->prefix_len, NULL);
> +				     &ip4->addrs[0].addr,
> +				     &ip4->addrs[0].prefix_len, NULL);
>  		if (rc < 0) {
>  			debug("Couldn't discover IPv4 address: %s",
>  			      strerror_(-rc));
>  			return 0;
>  		}
> +		ip4->addrs[0].permanent = permanent;
> +		ip4->addr_count = 1;
>  	}
>  
> -	if (!ip4->prefix_len) {
> -		in_addr_t addr = ntohl(ip4->addr.s_addr);
> -		if (IN_CLASSA(addr))
> -			ip4->prefix_len = (32 - IN_CLASSA_NSHIFT);
> -		else if (IN_CLASSB(addr))
> -			ip4->prefix_len = (32 - IN_CLASSB_NSHIFT);
> -		else if (IN_CLASSC(addr))
> -			ip4->prefix_len = (32 - IN_CLASSC_NSHIFT);
> +	/* Apply default prefix_len to first address if not set */
> +	if (!ip4->addrs[0].prefix_len) {
> +		in_addr_t a = ntohl(ip4->addrs[0].addr.s_addr);
> +
> +		if (IN_CLASSA(a))
> +			ip4->addrs[0].prefix_len = 8;
> +		else if (IN_CLASSB(a))
> +			ip4->addrs[0].prefix_len = 16;
> +		else if (IN_CLASSC(a))
> +			ip4->addrs[0].prefix_len = 24;
>  		else
> -			ip4->prefix_len = 32;
> +			ip4->addrs[0].prefix_len = 32;
>  	}
>  
> -	ip4->addr_seen = ip4->addr;
> +	ip4->addr_seen = ip4->addrs[0].addr;
>  
>  	ip4->our_tap_addr = ip4->guest_gw;
>  
> -	if (IN4_IS_ADDR_UNSPECIFIED(&ip4->addr))
> +	if (IN4_IS_ADDR_UNSPECIFIED(&ip4->addrs[0].addr))
>  		return 0;
>  
>  	return ifi;
> @@ -755,9 +762,9 @@ static unsigned int conf_ip4(unsigned int ifi, struct ip4_ctx *ip4)
>   */
>  static void conf_ip4_local(struct ip4_ctx *ip4)
>  {
> -	ip4->addr_seen = ip4->addr = IP4_LL_GUEST_ADDR;
> +	ip4->addr_seen = ip4->addrs[0].addr = IP4_LL_GUEST_ADDR;
>  	ip4->our_tap_addr = ip4->guest_gw = IP4_LL_GUEST_GW;
> -	ip4->prefix_len = IP4_LL_PREFIX_LEN;
> +	ip4->addrs[0].prefix_len = IP4_LL_PREFIX_LEN;
>  
>  	ip4->no_copy_addrs = ip4->no_copy_routes = true;
>  }
> @@ -766,10 +773,12 @@ static void conf_ip4_local(struct ip4_ctx *ip4)
>   * conf_ip6() - Verify or detect IPv6 support, get relevant addresses
>   * @ifi:	Host interface to attempt (0 to determine one)
>   * @ip6:	IPv6 context (will be written)
> + * @permanent:	Mark discovered addresses as permanent
>   *
>   * Return: interface index for IPv6, or 0 on failure.
>   */
> -static unsigned int conf_ip6(unsigned int ifi, struct ip6_ctx *ip6)
> +static unsigned int conf_ip6(unsigned int ifi, struct ip6_ctx *ip6,
> +			     bool permanent)
>  {
>  	int prefix_len = 0;
>  	int rc;
> @@ -792,19 +801,25 @@ static unsigned int conf_ip6(unsigned int ifi, struct ip6_ctx *ip6)
>  	}
>  
>  	rc = nl_addr_get(nl_sock, ifi, AF_INET6,
> -			 IN6_IS_ADDR_UNSPECIFIED(&ip6->addr) ? &ip6->addr : NULL,
> +			 ip6->addr_count ? NULL : &ip6->addrs[0].addr,
>  			 &prefix_len, &ip6->our_tap_ll);
>  	if (rc < 0) {
>  		debug("Couldn't discover IPv6 address: %s", strerror_(-rc));
>  		return 0;
>  	}
>  
> -	ip6->addr_seen = ip6->addr;
> +	if (!ip6->addr_count) {
> +		ip6->addrs[0].prefix_len = prefix_len ? prefix_len : 64;
> +		ip6->addrs[0].permanent = permanent;
> +		ip6->addr_count = 1;
> +	}
> +
> +	ip6->addr_seen = ip6->addrs[0].addr;
>  
>  	if (IN6_IS_ADDR_LINKLOCAL(&ip6->guest_gw))
>  		ip6->our_tap_ll = ip6->guest_gw;
>  
> -	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->addr) ||
> +	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->addrs[0].addr) ||
>  	    IN6_IS_ADDR_UNSPECIFIED(&ip6->our_tap_ll))
>  		return 0;
>  
> @@ -1149,11 +1164,13 @@ static void conf_print(const struct ctx *c)
>  		if (!c->no_dhcp) {
>  			uint32_t mask;
>  
> -			mask = htonl(0xffffffff << (32 - c->ip4.prefix_len));
> +			mask = htonl(0xffffffff <<
> +				     (32 - c->ip4.addrs[0].prefix_len));
>  
>  			info("DHCP:");
>  			info("    assign: %s",
> -			     inet_ntop(AF_INET, &c->ip4.addr, buf4, sizeof(buf4)));
> +			     inet_ntop(AF_INET, &c->ip4.addrs[0].addr,
> +				       buf4, sizeof(buf4)));
>  			info("    mask: %s",
>  			     inet_ntop(AF_INET, &mask,        buf4, sizeof(buf4)));
>  			info("    router: %s",
> @@ -1191,7 +1208,8 @@ static void conf_print(const struct ctx *c)
>  			goto dns6;
>  
>  		info("    assign: %s",
> -		     inet_ntop(AF_INET6, &c->ip6.addr, buf6, sizeof(buf6)));
> +		     inet_ntop(AF_INET6, &c->ip6.addrs[0].addr,
> +			       buf6, sizeof(buf6)));
>  		info("    router: %s",
>  		     inet_ntop(AF_INET6, &c->ip6.guest_gw, buf6, sizeof(buf6)));
>  		info("    our link-local: %s",
> @@ -1812,22 +1830,23 @@ void conf(struct ctx *c, int argc, char **argv)
>  			break;
>  		}
>  		case 'a':
> -			if (inet_pton(AF_INET6, optarg, &c->ip6.addr)	&&
> -			    !IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr)	&&
> -			    !IN6_IS_ADDR_LOOPBACK(&c->ip6.addr)		&&
> -			    !IN6_IS_ADDR_V4MAPPED(&c->ip6.addr)		&&
> -			    !IN6_IS_ADDR_V4COMPAT(&c->ip6.addr)		&&
> -			    !IN6_IS_ADDR_MULTICAST(&c->ip6.addr)) {
> +			if (inet_pton(AF_INET6, optarg,
> +				      &c->ip6.addrs[0].addr) &&
> +			    !IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addrs[0].addr) &&
> +			    !IN6_IS_ADDR_LOOPBACK(&c->ip6.addrs[0].addr) &&
> +			    !IN6_IS_ADDR_V4MAPPED(&c->ip6.addrs[0].addr) &&
> +			    !IN6_IS_ADDR_V4COMPAT(&c->ip6.addrs[0].addr) &&
> +			    !IN6_IS_ADDR_MULTICAST(&c->ip6.addrs[0].addr)) {
>  				if (c->mode == MODE_PASTA)
>  					c->ip6.no_copy_addrs = true;
>  				break;
>  			}
>  
> -			if (inet_pton(AF_INET, optarg, &c->ip4.addr)	&&
> -			    !IN4_IS_ADDR_UNSPECIFIED(&c->ip4.addr)	&&
> -			    !IN4_IS_ADDR_BROADCAST(&c->ip4.addr)	&&
> -			    !IN4_IS_ADDR_LOOPBACK(&c->ip4.addr)		&&
> -			    !IN4_IS_ADDR_MULTICAST(&c->ip4.addr)) {
> +			if (inet_pton(AF_INET, optarg, &c->ip4.addrs[0].addr) &&
> +			    !IN4_IS_ADDR_UNSPECIFIED(&c->ip4.addrs[0].addr) &&
> +			    !IN4_IS_ADDR_BROADCAST(&c->ip4.addrs[0].addr) &&
> +			    !IN4_IS_ADDR_LOOPBACK(&c->ip4.addrs[0].addr) &&
> +			    !IN4_IS_ADDR_MULTICAST(&c->ip4.addrs[0].addr)) {
>  				if (c->mode == MODE_PASTA)
>  					c->ip4.no_copy_addrs = true;
>  				break;
> @@ -1836,8 +1855,8 @@ void conf(struct ctx *c, int argc, char **argv)
>  			die("Invalid address: %s", optarg);
>  			break;
>  		case 'n':
> -			c->ip4.prefix_len = conf_ip4_prefix(optarg);
> -			if (c->ip4.prefix_len < 0)
> +			c->ip4.addrs[0].prefix_len = conf_ip4_prefix(optarg);
> +			if (c->ip4.addrs[0].prefix_len < 0)
>  				die("Invalid netmask: %s", optarg);
>  
>  			break;
> @@ -1984,9 +2003,9 @@ void conf(struct ctx *c, int argc, char **argv)
>  
>  	nl_sock_init(c, false);
>  	if (!v6_only)
> -		c->ifi4 = conf_ip4(ifi4, &c->ip4);
> +		c->ifi4 = conf_ip4(ifi4, &c->ip4, c->pasta_conf_ns);
>  	if (!v4_only)
> -		c->ifi6 = conf_ip6(ifi6, &c->ip6);
> +		c->ifi6 = conf_ip6(ifi6, &c->ip6, c->pasta_conf_ns);
>  
>  	if (c->ifi4 && c->mtu < IPV4_MIN_MTU) {
>  		warn("MTU %"PRIu16" is too small for IPv4 (minimum %u)",
> @@ -2125,7 +2144,7 @@ void conf(struct ctx *c, int argc, char **argv)
>  	if (!c->ifi6) {
>  		c->no_ndp = 1;
>  		c->no_dhcpv6 = 1;
> -	} else if (IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr)) {
> +	} else if (IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addrs[0].addr)) {
>  		c->no_dhcpv6 = 1;
>  	}
>  
> diff --git a/dhcp.c b/dhcp.c
> index 6b9c2e3..46ef8e3 100644
> --- a/dhcp.c
> +++ b/dhcp.c
> @@ -352,7 +352,7 @@ int dhcp(const struct ctx *c, struct iov_tail *data)
>  	reply.secs		= 0;
>  	reply.flags		= m->flags;
>  	reply.ciaddr		= m->ciaddr;
> -	reply.yiaddr		= c->ip4.addr;
> +	reply.yiaddr		= c->ip4.addrs[0].addr;
>  	reply.siaddr		= 0;
>  	reply.giaddr		= m->giaddr;
>  	memcpy(&reply.chaddr,	m->chaddr,	sizeof(reply.chaddr));
> @@ -404,7 +404,7 @@ int dhcp(const struct ctx *c, struct iov_tail *data)
>  
>  	info("    from %s", eth_ntop(m->chaddr, macstr, sizeof(macstr)));
>  
> -	mask.s_addr = htonl(0xffffffff << (32 - c->ip4.prefix_len));
> +	mask.s_addr = htonl(0xffffffff << (32 - c->ip4.addrs[0].prefix_len));
>  	memcpy(opts[1].s,  &mask,                sizeof(mask));
>  	memcpy(opts[3].s,  &c->ip4.guest_gw,     sizeof(c->ip4.guest_gw));
>  	memcpy(opts[54].s, &c->ip4.our_tap_addr, sizeof(c->ip4.our_tap_addr));
> @@ -412,7 +412,7 @@ int dhcp(const struct ctx *c, struct iov_tail *data)
>  	/* If the gateway is not on the assigned subnet, send an option 121
>  	 * (Classless Static Routing) adding a dummy route to it.
>  	 */
> -	if ((c->ip4.addr.s_addr & mask.s_addr)
> +	if ((c->ip4.addrs[0].addr.s_addr & mask.s_addr)
>  	    != (c->ip4.guest_gw.s_addr & mask.s_addr)) {
>  		/* a.b.c.d/32:0.0.0.0, 0:a.b.c.d */
>  		opts[121].slen = 14;
> @@ -469,7 +469,7 @@ int dhcp(const struct ctx *c, struct iov_tail *data)
>  	if (m->flags & FLAG_BROADCAST)
>  		dst = in4addr_broadcast;
>  	else
> -		dst = c->ip4.addr;
> +		dst = c->ip4.addrs[0].addr;
>  
>  	tap_udp4_send(c, c->ip4.our_tap_addr, 67, dst, 68, &reply, dlen);
>  
> diff --git a/dhcpv6.c b/dhcpv6.c
> index e4df0db..7eae6a1 100644
> --- a/dhcpv6.c
> +++ b/dhcpv6.c
> @@ -625,7 +625,7 @@ int dhcpv6(struct ctx *c, struct iov_tail *data,
>  		if (mh->type == TYPE_CONFIRM && server_id)
>  			return -1;
>  
> -		if (dhcpv6_ia_notonlink(data, &c->ip6.addr)) {
> +		if (dhcpv6_ia_notonlink(data, &c->ip6.addrs[0].addr)) {
>  
>  			dhcpv6_send_ia_notonlink(c, data, &client_id_base,
>  						 ntohs(client_id->l), mh->xid);
> @@ -679,7 +679,7 @@ int dhcpv6(struct ctx *c, struct iov_tail *data,
>  
>  	tap_udp6_send(c, src, 547, tap_ip6_daddr(c, src), 546,
>  		      mh->xid, &resp, n);
> -	c->ip6.addr_seen = c->ip6.addr;
> +	c->ip6.addr_seen = c->ip6.addrs[0].addr;
>  
>  	return 1;
>  }
> @@ -703,5 +703,5 @@ void dhcpv6_init(const struct ctx *c)
>  	memcpy(resp_not_on_link.server_id.duid_lladdr,
>  	       c->our_tap_mac, sizeof(c->our_tap_mac));
>  
> -	resp.ia_addr.addr	= c->ip6.addr;
> +	resp.ia_addr.addr	= c->ip6.addrs[0].addr;
>  }
> diff --git a/fwd.c b/fwd.c
> index 44a0e10..408af30 100644
> --- a/fwd.c
> +++ b/fwd.c
> @@ -516,7 +516,7 @@ static bool fwd_guest_accessible4(const struct ctx *c,
>  	/* For IPv4, addr_seen is initialised to addr, so is always a valid
>  	 * address
>  	 */
> -	if (IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addr) ||
> +	if (IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addrs[0].addr) ||
>  	    IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addr_seen))
>  		return false;
>  
> @@ -537,7 +537,7 @@ static bool fwd_guest_accessible6(const struct ctx *c,
>  	if (IN6_IS_ADDR_LOOPBACK(addr))
>  		return false;
>  
> -	if (IN6_ARE_ADDR_EQUAL(addr, &c->ip6.addr))
> +	if (IN6_ARE_ADDR_EQUAL(addr, &c->ip6.addrs[0].addr))
>  		return false;
>  
>  	/* For IPv6, addr_seen starts unspecified, because we don't know what LL
> @@ -587,9 +587,9 @@ static void nat_outbound(const struct ctx *c, const union inany_addr *addr,
>  	else if (inany_equals6(addr, &c->ip6.map_host_loopback))
>  		*translated = inany_loopback6;
>  	else if (inany_equals4(addr, &c->ip4.map_guest_addr))
> -		*translated = inany_from_v4(c->ip4.addr);
> +		*translated = inany_from_v4(c->ip4.addrs[0].addr);
>  	else if (inany_equals6(addr, &c->ip6.map_guest_addr))
> -		translated->a6 = c->ip6.addr;
> +		translated->a6 = c->ip6.addrs[0].addr;
>  	else
>  		*translated = *addr;
>  }
> @@ -710,10 +710,10 @@ bool nat_inbound(const struct ctx *c, const union inany_addr *addr,
>  		   inany_equals6(addr, &in6addr_loopback)) {
>  		translated->a6 = c->ip6.map_host_loopback;
>  	} else if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.map_guest_addr) &&
> -		   inany_equals4(addr, &c->ip4.addr)) {
> +		   inany_equals4(addr, &c->ip4.addrs[0].addr)) {
>  		*translated = inany_from_v4(c->ip4.map_guest_addr);
>  	} else if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.map_guest_addr) &&
> -		   inany_equals6(addr, &c->ip6.addr)) {
> +		   inany_equals6(addr, &c->ip6.addrs[0].addr)) {
>  		translated->a6 = c->ip6.map_guest_addr;
>  	} else if (fwd_guest_accessible(c, addr)) {
>  		*translated = *addr;
> diff --git a/ip.h b/ip.h
> index 5830b92..748cb1f 100644
> --- a/ip.h
> +++ b/ip.h
> @@ -135,4 +135,30 @@ static const struct in_addr in4addr_broadcast = { 0xffffffff };
>  #define IPV6_MIN_MTU		1280
>  #endif
>  
> +/* Maximum number of addresses per address family */
> +#define IP4_MAX_ADDRS		8
> +#define IP6_MAX_ADDRS		16
> +
> +/**
> + * struct ip4_addr_entry - IPv4 address with prefix length
> + * @addr:		IPv4 address
> + * @prefix_len:		Prefix length (netmask bits)
> + */
> +struct ip4_addr_entry {
> +	struct in_addr addr;
> +	int prefix_len;
> +	int permanent;

Might as well make these uint8_t and bool, respectively.  There will
be some padding, but the overall structure will still be smaller.

Or, it might be worth considering replacing 'permanent' with a flags
mask, in case we have future uses for it.

> +};
> +
> +/**
> + * struct ip6_addr_entry - IPv6 address with prefix length
> + * @addr:		IPv6 address
> + * @prefix_len:		Prefix length
> + */
> +struct ip6_addr_entry {
> +	struct in6_addr addr;
> +	int prefix_len;
> +	int permanent;

Ditto.

> +};
> +
>  #endif /* IP_H */
> diff --git a/ndp.c b/ndp.c
> index eb9e313..868a234 100644
> --- a/ndp.c
> +++ b/ndp.c
> @@ -257,7 +257,7 @@ static void ndp_ra(const struct ctx *c, const struct in6_addr *dst)
>  			.valid_lifetime		= ~0U,
>  			.pref_lifetime		= ~0U,
>  		},
> -		.prefix = c->ip6.addr,
> +		.prefix = c->ip6.addrs[0].addr,
>  		.source_ll = {
>  			.header = {
>  				.type		= OPT_SRC_L2_ADDR,
> @@ -466,8 +466,8 @@ void ndp_send_init_req(const struct ctx *c)
>  			.icmp6_solicited	= 0, /* Reserved */
>  			.icmp6_override		= 0, /* Reserved */
>  		},
> -		.target_addr = c->ip6.addr
> +		.target_addr = c->ip6.addrs[0].addr
>  	};
>  	debug("Sending initial NDP NS request for guest MAC address");
> -	ndp_send(c, &c->ip6.addr, &ns, sizeof(ns));
> +	ndp_send(c, &c->ip6.addrs[0].addr, &ns, sizeof(ns));
>  }
> diff --git a/passt.h b/passt.h
> index 79d01dd..533f2cb 100644
> --- a/passt.h
> +++ b/passt.h
> @@ -66,9 +66,9 @@ enum passt_modes {
>  
>  /**
>   * struct ip4_ctx - IPv4 execution context
> - * @addr:		IPv4 address assigned to guest
> + * @addrs:		IPv4 addresses assigned to guest
> + * @addr_count:		Number of addresses in addrs[] array
>   * @addr_seen:		Latest IPv4 address seen as source from tap
> - * @prefixlen:		IPv4 prefix length (netmask)
>   * @guest_gw:		IPv4 gateway as seen by the guest
>   * @map_host_loopback:	Outbound connections to this address are NATted to the
>   *                      host's 127.0.0.1
> @@ -85,9 +85,10 @@ enum passt_modes {
>   */
>  struct ip4_ctx {
>  	/* PIF_TAP addresses */
> -	struct in_addr addr;
> +	struct ip4_addr_entry addrs[IP4_MAX_ADDRS];
> +	int addr_count;
> +
>  	struct in_addr addr_seen;
> -	int prefix_len;
>  	struct in_addr guest_gw;
>  	struct in_addr map_host_loopback;
>  	struct in_addr map_guest_addr;
> @@ -107,7 +108,8 @@ struct ip4_ctx {
>  
>  /**
>   * struct ip6_ctx - IPv6 execution context
> - * @addr:		IPv6 address assigned to guest
> + * @addrs:		IPv6 addresses assigned to guest
> + * @addr_count:		Number of addresses in addrs[] array
>   * @addr_seen:		Latest IPv6 global/site address seen as source from tap
>   * @addr_ll_seen:	Latest IPv6 link-local address seen as source from tap
>   * @guest_gw:		IPv6 gateway as seen by the guest
> @@ -126,7 +128,9 @@ struct ip4_ctx {
>   */
>  struct ip6_ctx {
>  	/* PIF_TAP addresses */
> -	struct in6_addr addr;
> +	struct ip6_addr_entry addrs[IP6_MAX_ADDRS];
> +	int addr_count;
> +
>  	struct in6_addr addr_seen;
>  	struct in6_addr addr_ll_seen;
>  	struct in6_addr guest_gw;
> diff --git a/pasta.c b/pasta.c
> index 674b554..49b393c 100644
> --- a/pasta.c
> +++ b/pasta.c
> @@ -331,8 +331,8 @@ void pasta_ns_conf(struct ctx *c)
>  			if (c->ip4.no_copy_addrs) {
>  				rc = nl_addr_set(nl_sock_ns, c->pasta_ifi,
>  						 AF_INET,
> -						 &c->ip4.addr,
> -						 c->ip4.prefix_len);
> +						 &c->ip4.addrs[0].addr,
> +						 c->ip4.addrs[0].prefix_len);
>  			} else {
>  				rc = nl_addr_dup(nl_sock, c->ifi4,
>  						 nl_sock_ns, c->pasta_ifi,
> @@ -378,10 +378,12 @@ void pasta_ns_conf(struct ctx *c)
>  					  0, IFF_NOARP);
>  
>  			if (c->ip6.no_copy_addrs) {
> -				if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr)) {
> +				struct in6_addr *a = &c->ip6.addrs[0].addr;
> +
> +				if (!IN6_IS_ADDR_UNSPECIFIED(a)) {
>  					rc = nl_addr_set(nl_sock_ns,
> -							 c->pasta_ifi, AF_INET6,
> -							 &c->ip6.addr, 64);
> +							 c->pasta_ifi,
> +							 AF_INET6, a, 64);
>  				}
>  			} else {
>  				rc = nl_addr_dup(nl_sock, c->ifi6,
> diff --git a/tap.c b/tap.c
> index e3ea61c..0b96cc1 100644
> --- a/tap.c
> +++ b/tap.c
> @@ -951,8 +951,8 @@ resume:
>  				c->ip6.addr_seen = *saddr;
>  			}
>  
> -			if (IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr))
> -				c->ip6.addr = *saddr;
> +			if (IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addrs[0].addr))
> +				c->ip6.addrs[0].addr = *saddr;
>  		} else if (!IN6_IS_ADDR_UNSPECIFIED(saddr)){
>  			c->ip6.addr_seen = *saddr;
>  		}
> diff --git a/util.h b/util.h
> index f7a941f..4273e0d 100644
> --- a/util.h
> +++ b/util.h
> @@ -401,4 +401,5 @@ static inline int wrap_getsockname(int sockfd, struct sockaddr *addr,
>  #define PASST_MAXDNAME 254 /* 253 (RFC 1035) + 1 (the terminator) */
>  void encode_domain_name(char *buf, const char *domain_name);
>  
> +
>  #endif /* UTIL_H */
> -- 
> 2.51.1
> 

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC  02/12] ip: Add ip4_default_prefix_len() helper function for class-based prefix
  2025-12-15  1:54 ` [RFC 02/12] ip: Add ip4_default_prefix_len() helper function for class-based prefix Jon Maloy
@ 2025-12-15  9:41   ` David Gibson
  0 siblings, 0 replies; 25+ messages in thread
From: David Gibson @ 2025-12-15  9:41 UTC (permalink / raw)
  To: Jon Maloy; +Cc: sbrivio, dgibson, passt-dev

[-- Attachment #1: Type: text/plain, Size: 2968 bytes --]

On Sun, Dec 14, 2025 at 08:54:31PM -0500, Jon Maloy wrote:
> We add a helper function to calculate the default IPv4 prefix length
> based on address class. This is used to replace the current inline
> calculation in conf_ip4(), and is also a preparation for more uses
> of this functionality in the coming commits.
> 
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

Not a complaint about this patch, since it's just code motion, but the
whole idea of network classes is obsolete these days.  I wonder if we
should deprecate automatic picking of prefix_len based on class.

> ---
>  conf.c | 15 +++------------
>  ip.c   | 21 +++++++++++++++++++++
>  ip.h   |  2 ++
>  3 files changed, 26 insertions(+), 12 deletions(-)
> 
> diff --git a/conf.c b/conf.c
> index 0e96f36..31acc20 100644
> --- a/conf.c
> +++ b/conf.c
> @@ -733,18 +733,9 @@ static unsigned int conf_ip4(unsigned int ifi, struct ip4_ctx *ip4,
>  	}
>  
>  	/* Apply default prefix_len to first address if not set */
> -	if (!ip4->addrs[0].prefix_len) {
> -		in_addr_t a = ntohl(ip4->addrs[0].addr.s_addr);
> -
> -		if (IN_CLASSA(a))
> -			ip4->addrs[0].prefix_len = 8;
> -		else if (IN_CLASSB(a))
> -			ip4->addrs[0].prefix_len = 16;
> -		else if (IN_CLASSC(a))
> -			ip4->addrs[0].prefix_len = 24;
> -		else
> -			ip4->addrs[0].prefix_len = 32;
> -	}
> +	if (!ip4->addrs[0].prefix_len)
> +		ip4->addrs[0].prefix_len =
> +			ip4_default_prefix_len(&ip4->addrs[0].addr);
>  
>  	ip4->addr_seen = ip4->addrs[0].addr;
>  
> diff --git a/ip.c b/ip.c
> index 9a7f4c5..2519c71 100644
> --- a/ip.c
> +++ b/ip.c
> @@ -13,6 +13,8 @@
>   */
>  
>  #include <stddef.h>
> +#include <netinet/in.h>
> +
>  #include "util.h"
>  #include "ip.h"
>  
> @@ -67,3 +69,22 @@ found:
>  	*proto = nh;
>  	return true;
>  }
> +
> +/**
> + * ip4_default_prefix_len() - Get default prefix length for IPv4 address
> + * @addr:	IPv4 address
> + *
> + * Return: prefix length based on address class (8/16/24), or 32 for other
> + */
> +int ip4_default_prefix_len(const struct in_addr *addr)
> +{
> +	in_addr_t a = ntohl(addr->s_addr);
> +
> +	if (IN_CLASSA(a))
> +		return 8;
> +	if (IN_CLASSB(a))
> +		return 16;
> +	if (IN_CLASSC(a))
> +		return 24;
> +	return 32;
> +}
> diff --git a/ip.h b/ip.h
> index 748cb1f..065b78b 100644
> --- a/ip.h
> +++ b/ip.h
> @@ -139,6 +139,8 @@ static const struct in_addr in4addr_broadcast = { 0xffffffff };
>  #define IP4_MAX_ADDRS		8
>  #define IP6_MAX_ADDRS		16
>  
> +int ip4_default_prefix_len(const struct in_addr *addr);
> +
>  /**
>   * struct ip4_addr_entry - IPv4 address with prefix length
>   * @addr:		IPv4 address
> -- 
> 2.51.1
> 

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC  01/12] ip: Introduce multi-address data structures for IPv4 and IPv6
  2025-12-15  1:54 ` [RFC 01/12] ip: Introduce multi-address data structures for IPv4 and IPv6 Jon Maloy
  2025-12-15  9:40   ` David Gibson
@ 2025-12-15  9:46   ` David Gibson
  1 sibling, 0 replies; 25+ messages in thread
From: David Gibson @ 2025-12-15  9:46 UTC (permalink / raw)
  To: Jon Maloy; +Cc: sbrivio, dgibson, passt-dev

[-- Attachment #1: Type: text/plain, Size: 1810 bytes --]

On Sun, Dec 14, 2025 at 08:54:30PM -0500, Jon Maloy wrote:
> As preparation for supporting multiple addresses per interface, we
> replace the single addr/prefix_len fields with arrays.
> 
> - We add an ip4_addr_entry and an ip6_addr_entry struct containing
>   address and prefix length.
> 
> - We set the array sizes to IP4_MAX_ADDRS=8 and IP6_MAX_ADDRS=16,
>   respectively.
> 
> The only functional change is that the IPv6 prefix length now is
> properly stored instead of being hardcoded to 64 even when set
> via the -a option.
> 
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>
> ---
>  arp.c    |  4 +--
>  conf.c   | 97 +++++++++++++++++++++++++++++++++-----------------------
>  dhcp.c   |  8 ++---
>  dhcpv6.c |  6 ++--
>  fwd.c    | 12 +++----
>  ip.h     | 26 +++++++++++++++
>  ndp.c    |  6 ++--
>  passt.h  | 16 ++++++----
>  pasta.c  | 12 ++++---
>  tap.c    |  4 +--
>  util.h   |  1 +
>  11 files changed, 122 insertions(+), 70 deletions(-)
> 
> diff --git a/arp.c b/arp.c
> index bb042e9..7eaf517 100644
> --- a/arp.c
> +++ b/arp.c
> @@ -54,7 +54,7 @@ static bool ignore_arp(const struct ctx *c,
>  		return true;
>  
>  	/* Don't resolve the guest's assigned address, either. */
> -	if (!memcmp(am->tip, &c->ip4.addr, sizeof(am->tip)))
> +	if (!memcmp(am->tip, &c->ip4.addrs[0].addr, sizeof(am->tip)))
>  		return true;

Sorry, missed this on my first pass.  This needs to skip *all* of the
guest addresses, not just the first one.  Otherwise passt will respond
to arps for the guest's other addresses with its own MAC which will
break everything.

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC  03/12] conf: Allow multiple -a/--address options per address family
  2025-12-15  1:54 ` [RFC 03/12] conf: Allow multiple -a/--address options per address family Jon Maloy
@ 2025-12-15  9:53   ` David Gibson
  0 siblings, 0 replies; 25+ messages in thread
From: David Gibson @ 2025-12-15  9:53 UTC (permalink / raw)
  To: Jon Maloy; +Cc: sbrivio, dgibson, passt-dev

[-- Attachment #1: Type: text/plain, Size: 9324 bytes --]

On Sun, Dec 14, 2025 at 08:54:32PM -0500, Jon Maloy wrote:
> We enable configuration of multiple IPv4 and IPv6 addresses by allowing
> repeated use of the -a/--address option.
> 
> - We update option parsing to append addresses to the addrs[] array.
> 
> - Each address specified via -a does initially get a class-based default
>   prefix.
> 
> - If no -a option is given, address and prefix are inherited from
>   the template interface, just like now.
> 
> - The -n/--netmask option applies only to the first address, in addrs[0].

I know this is temporary, but this seems a really bad semantic.
Might be worth merging this patch with the -n patch.

> - We configure all indicated addresses in the namespace interface.
> 
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>
> ---
>  conf.c  | 74 ++++++++++++++++++++++++++++++++++++++-------------------
>  pasta.c | 24 ++++++++++++++-----
>  2 files changed, 68 insertions(+), 30 deletions(-)
> 
> diff --git a/conf.c b/conf.c
> index 31acc20..e9f217b 100644
> --- a/conf.c
> +++ b/conf.c
> @@ -741,7 +741,7 @@ static unsigned int conf_ip4(unsigned int ifi, struct ip4_ctx *ip4,
>  
>  	ip4->our_tap_addr = ip4->guest_gw;
>  
> -	if (IN4_IS_ADDR_UNSPECIFIED(&ip4->addrs[0].addr))
> +	if (!ip4->addr_count)
>  		return 0;
>  
>  	return ifi;
> @@ -756,6 +756,7 @@ static void conf_ip4_local(struct ip4_ctx *ip4)
>  	ip4->addr_seen = ip4->addrs[0].addr = IP4_LL_GUEST_ADDR;
>  	ip4->our_tap_addr = ip4->guest_gw = IP4_LL_GUEST_GW;
>  	ip4->addrs[0].prefix_len = IP4_LL_PREFIX_LEN;
> +	ip4->addr_count = 1;

Doesn't this belong in an earlier patch?

Also, kind of pre-existing, but I don't think conf_ip4_local() should
overwrite an address specified with -a.  I think we want to change
that to either be skipped if -a is used, or to add the link local
address to the ones given explicitly.

>  
>  	ip4->no_copy_addrs = ip4->no_copy_routes = true;
>  }
> @@ -810,8 +811,7 @@ static unsigned int conf_ip6(unsigned int ifi, struct ip6_ctx *ip6,
>  	if (IN6_IS_ADDR_LINKLOCAL(&ip6->guest_gw))
>  		ip6->our_tap_ll = ip6->guest_gw;
>  
> -	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->addrs[0].addr) ||
> -	    IN6_IS_ADDR_UNSPECIFIED(&ip6->our_tap_ll))
> +	if (!ip6->addr_count || IN6_IS_ADDR_UNSPECIFIED(&ip6->our_tap_ll))
>  		return 0;
>  
>  	return ifi;
> @@ -903,9 +903,11 @@ static void usage(const char *name, FILE *f, int status)
>  		"    default: 65520: maximum 802.3 MTU minus 802.3 header\n"
>  		"                    length, rounded to 32 bits (IPv4 words)\n"
>  		"  -a, --address ADDR	Assign IPv4 or IPv6 address ADDR\n"
> -		"    can be specified zero to two times (for IPv4 and IPv6)\n"
> +		"    can be specified multiple times (limit: %d IPv4, %d IPv6)\n"

man page might need an update too.

>  		"    default: use addresses from interface with default route\n"
> -		"  -n, --netmask MASK	Assign IPv4 MASK, dot-decimal or bits\n"
> +		"  -n, --netmask MASK	Assign IPv4 MASK, dot-decimal or bits\n",
> +		IP4_MAX_ADDRS, IP6_MAX_ADDRS);
> +	FPRINTF(f,
>  		"    default: netmask from matching address on the host\n"
>  		"  -M, --mac-addr ADDR	Use source MAC address ADDR\n"
>  		"    default: 9a:55:9a:55:9a:55 (locally administered)\n"
> @@ -1159,9 +1161,11 @@ static void conf_print(const struct ctx *c)
>  				     (32 - c->ip4.addrs[0].prefix_len));
>  
>  			info("DHCP:");
> -			info("    assign: %s",
> -			     inet_ntop(AF_INET, &c->ip4.addrs[0].addr,
> -				       buf4, sizeof(buf4)));
> +			for (i = 0; i < (int)c->ip4.addr_count; i++) {
> +				info("    assign: %s",
> +				     inet_ntop(AF_INET, &c->ip4.addrs[i].addr,
> +					       buf4, sizeof(buf4)));
> +			}

This is misleading.  We allow multiple addresses, but (at least as of
this patch) DHCP will only assign one of them.  In fact I'm not sure
we can assign multiple addresses with DHCP, short of rarely-supported
extensions.

>  			info("    mask: %s",
>  			     inet_ntop(AF_INET, &mask,        buf4, sizeof(buf4)));
>  			info("    router: %s",
> @@ -1198,9 +1202,11 @@ static void conf_print(const struct ctx *c)
>  		else
>  			goto dns6;
>  
> -		info("    assign: %s",
> -		     inet_ntop(AF_INET6, &c->ip6.addrs[0].addr,
> -			       buf6, sizeof(buf6)));
> +		for (i = 0; i < (int)c->ip6.addr_count; i++) {
> +			info("    assign: %s",
> +			     inet_ntop(AF_INET6, &c->ip6.addrs[i].addr,
> +				       buf6, sizeof(buf6)));
> +		}

Similar comments, though I think DHCPv6 can assign multiple addresses
(it's still not implemented as of this patch, though).

>  		info("    router: %s",
>  		     inet_ntop(AF_INET6, &c->ip6.guest_gw, buf6, sizeof(buf6)));
>  		info("    our link-local: %s",
> @@ -1517,6 +1523,8 @@ void conf(struct ctx *c, int argc, char **argv)
>  	struct fqdn *dnss = c->dns_search;
>  	unsigned int ifi4 = 0, ifi6 = 0;
>  	const char *logfile = NULL;
> +	struct in6_addr addr6;
> +	struct in_addr addr4;
>  	size_t logsize = 0;
>  	char *runas = NULL;
>  	long fd_tap_opt;
> @@ -1821,23 +1829,41 @@ void conf(struct ctx *c, int argc, char **argv)
>  			break;
>  		}
>  		case 'a':
> -			if (inet_pton(AF_INET6, optarg,
> -				      &c->ip6.addrs[0].addr) &&
> -			    !IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addrs[0].addr) &&
> -			    !IN6_IS_ADDR_LOOPBACK(&c->ip6.addrs[0].addr) &&
> -			    !IN6_IS_ADDR_V4MAPPED(&c->ip6.addrs[0].addr) &&
> -			    !IN6_IS_ADDR_V4COMPAT(&c->ip6.addrs[0].addr) &&
> -			    !IN6_IS_ADDR_MULTICAST(&c->ip6.addrs[0].addr)) {
> +			if (inet_pton(AF_INET6, optarg, &addr6) &&
> +			    !IN6_IS_ADDR_UNSPECIFIED(&addr6) &&
> +			    !IN6_IS_ADDR_LOOPBACK(&addr6) &&
> +			    !IN6_IS_ADDR_V4MAPPED(&addr6) &&
> +			    !IN6_IS_ADDR_V4COMPAT(&addr6) &&
> +			    !IN6_IS_ADDR_MULTICAST(&addr6)) {
> +				unsigned int i = c->ip6.addr_count;
> +
> +				if (i >= IP6_MAX_ADDRS)
> +					die("Too many IPv6 addresses");
> +
> +				c->ip6.addrs[i].addr = addr6;
> +				c->ip6.addrs[i].prefix_len = 64;
> +				c->ip6.addrs[i].permanent = true;
> +				c->ip6.addr_count++;
>  				if (c->mode == MODE_PASTA)
>  					c->ip6.no_copy_addrs = true;
>  				break;
>  			}
>  
> -			if (inet_pton(AF_INET, optarg, &c->ip4.addrs[0].addr) &&
> -			    !IN4_IS_ADDR_UNSPECIFIED(&c->ip4.addrs[0].addr) &&
> -			    !IN4_IS_ADDR_BROADCAST(&c->ip4.addrs[0].addr) &&
> -			    !IN4_IS_ADDR_LOOPBACK(&c->ip4.addrs[0].addr) &&
> -			    !IN4_IS_ADDR_MULTICAST(&c->ip4.addrs[0].addr)) {
> +			if (inet_pton(AF_INET, optarg, &addr4) &&
> +			    !IN4_IS_ADDR_UNSPECIFIED(&addr4) &&
> +			    !IN4_IS_ADDR_BROADCAST(&addr4) &&
> +			    !IN4_IS_ADDR_LOOPBACK(&addr4) &&
> +			    !IN4_IS_ADDR_MULTICAST(&addr4)) {
> +				unsigned int i = c->ip4.addr_count;
> +
> +				if (i >= IP4_MAX_ADDRS)
> +					die("Too many IPv4 addresses");
> +
> +				c->ip4.addrs[i].addr = addr4;
> +				c->ip4.addrs[i].prefix_len =
> +					ip4_default_prefix_len(&addr4);
> +				c->ip4.addrs[i].permanent = true;
> +				c->ip4.addr_count++;
>  				if (c->mode == MODE_PASTA)
>  					c->ip4.no_copy_addrs = true;
>  				break;
> @@ -2135,7 +2161,7 @@ void conf(struct ctx *c, int argc, char **argv)
>  	if (!c->ifi6) {
>  		c->no_ndp = 1;
>  		c->no_dhcpv6 = 1;
> -	} else if (IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addrs[0].addr)) {
> +	} else if (!c->ip6.addr_count) {
>  		c->no_dhcpv6 = 1;
>  	}
>  
> diff --git a/pasta.c b/pasta.c
> index 49b393c..fe2908f 100644
> --- a/pasta.c
> +++ b/pasta.c
> @@ -329,10 +329,16 @@ void pasta_ns_conf(struct ctx *c)
>  
>  		if (c->ifi4) {
>  			if (c->ip4.no_copy_addrs) {
> -				rc = nl_addr_set(nl_sock_ns, c->pasta_ifi,
> -						 AF_INET,
> -						 &c->ip4.addrs[0].addr,
> -						 c->ip4.addrs[0].prefix_len);
> +				int i;
> +
> +				for (i = 0; i < c->ip4.addr_count; i++) {
> +					rc = nl_addr_set(nl_sock_ns,
> +							 c->pasta_ifi, AF_INET,
> +							 &c->ip4.addrs[i].addr,
> +							 c->ip4.addrs[i].prefix_len);
> +					if (rc < 0)
> +						break;
> +				}

Maybe it belongs in a different patch, but multiple address support
might less us partially unify the "copy addrs" and "no_copy_addrs"
paths.  Rather than copying addresses direct from the host into the
guest, we optionally add host addresses into the address list, then
unconditionally set everything in the list in the guest.

>  			} else {
>  				rc = nl_addr_dup(nl_sock, c->ifi4,
>  						 nl_sock_ns, c->pasta_ifi,
> @@ -378,12 +384,18 @@ void pasta_ns_conf(struct ctx *c)
>  					  0, IFF_NOARP);
>  
>  			if (c->ip6.no_copy_addrs) {
> -				struct in6_addr *a = &c->ip6.addrs[0].addr;
> +				struct in6_addr *a;
> +				int i;
>  
> -				if (!IN6_IS_ADDR_UNSPECIFIED(a)) {
> +				for (i = 0; i < c->ip6.addr_count; i++) {
> +					a = &c->ip6.addrs[i].addr;
> +					if (IN6_IS_ADDR_UNSPECIFIED(a))
> +						continue;
>  					rc = nl_addr_set(nl_sock_ns,
>  							 c->pasta_ifi,
>  							 AF_INET6, a, 64);
> +					if (rc < 0)
> +						break;
>  				}
>  			} else {
>  				rc = nl_addr_dup(nl_sock, c->ifi6,
> -- 
> 2.51.1
> 

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC  04/12] conf: Apply -n/--netmask to most recently added address
  2025-12-15  1:54 ` [RFC 04/12] conf: Apply -n/--netmask to most recently added address Jon Maloy
@ 2025-12-15  9:54   ` David Gibson
  2025-12-15 22:43     ` Jon Maloy
  0 siblings, 1 reply; 25+ messages in thread
From: David Gibson @ 2025-12-15  9:54 UTC (permalink / raw)
  To: Jon Maloy; +Cc: sbrivio, dgibson, passt-dev

[-- Attachment #1: Type: text/plain, Size: 2086 bytes --]

On Sun, Dec 14, 2025 at 08:54:33PM -0500, Jon Maloy wrote:
> We allow for multiple -n/--netmask options, and update the handling so
> that each given prefix length is applied to the most recently added
> address instead of always to addrs[0].
> 
> This allows per-address prefix configuration, such as:
> 
>     -a 10.0.0.1 -n 24 -a 10.0.0.2 -n 16
> 
> If no address has been added yet, -n still applies to addrs[0] for
> backwards compatibility.

Huh.  I'd forgotten until looking at this series that we had the -n
option.  We probably do need this change, but in general I think we'd
be better off deprecating -n, in favour of allowing a prefix len as
part of the -a option, e.g.

	-a 10.0.0.1/24 -a 10.0.0.2/16

That could be a smaller change preliminary to this series.

> 
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>
> ---
>  conf.c | 9 +++++++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/conf.c b/conf.c
> index e9f217b..0a4a28a 100644
> --- a/conf.c
> +++ b/conf.c
> @@ -1528,6 +1528,7 @@ void conf(struct ctx *c, int argc, char **argv)
>  	size_t logsize = 0;
>  	char *runas = NULL;
>  	long fd_tap_opt;
> +	int prefix, idx;
>  	int name, ret;
>  	uid_t uid;
>  	gid_t gid;
> @@ -1872,10 +1873,14 @@ void conf(struct ctx *c, int argc, char **argv)
>  			die("Invalid address: %s", optarg);
>  			break;
>  		case 'n':
> -			c->ip4.addrs[0].prefix_len = conf_ip4_prefix(optarg);
> -			if (c->ip4.addrs[0].prefix_len < 0)
> +			prefix = conf_ip4_prefix(optarg);
> +
> +			if (prefix < 0)
>  				die("Invalid netmask: %s", optarg);
>  
> +			/* Apply to most recent address, or addrs[0] if none yet */
> +			idx = c->ip4.addr_count ? c->ip4.addr_count - 1 : 0;
> +			c->ip4.addrs[idx].prefix_len = prefix;
>  			break;
>  		case 'M':
>  			parse_mac(c->our_tap_mac, optarg);
> -- 
> 2.51.1
> 

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC  05/12] fwd: Check all configured addresses in guest accessibility functions
  2025-12-15  1:54 ` [RFC 05/12] fwd: Check all configured addresses in guest accessibility functions Jon Maloy
@ 2025-12-15 10:06   ` David Gibson
  0 siblings, 0 replies; 25+ messages in thread
From: David Gibson @ 2025-12-15 10:06 UTC (permalink / raw)
  To: Jon Maloy; +Cc: sbrivio, dgibson, passt-dev

[-- Attachment #1: Type: text/plain, Size: 3682 bytes --]

On Sun, Dec 14, 2025 at 08:54:34PM -0500, Jon Maloy wrote:
> We update fwd_guest_accessible4() and fwd_guest_accessible6() to check
> against all addresses in the addrs[] array, not just addrs[0].
> 
> This ensures that when multiple addresses are configured via -a options,
> traffic using any of them is correctly identified as guest traffic for
> NAT and forwarding decisions.

That last paragraph is not an accurate.  fwd_guest_accessible() isn't
about "identifying guest traffic".  It's about detecting inbound
traffic that we have *no way* to forward to the guest and dropping it.
This occurs when we have a peer address that we have no translation
for, but collides with an address the guest is using.

> 
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>
> ---
>  fwd.c | 22 ++++++++++++++++------
>  1 file changed, 16 insertions(+), 6 deletions(-)
> 
> diff --git a/fwd.c b/fwd.c
> index 408af30..ece381d 100644
> --- a/fwd.c
> +++ b/fwd.c
> @@ -502,6 +502,8 @@ static bool is_dns_flow(uint8_t proto, const struct flowside *ini)
>  static bool fwd_guest_accessible4(const struct ctx *c,
>  				    const struct in_addr *addr)
>  {
> +	int i;
> +
>  	if (IN4_IS_ADDR_LOOPBACK(addr))
>  		return false;
>  
> @@ -513,11 +515,15 @@ static bool fwd_guest_accessible4(const struct ctx *c,
>  	if (IN4_IS_ADDR_UNSPECIFIED(addr))
>  		return false;
>  
> -	/* For IPv4, addr_seen is initialised to addr, so is always a valid
> -	 * address
> +	/* Check against all configured guest addresses */
> +	for (i = 0; i < c->ip4.addr_count; i++)
> +		if (IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addrs[i].addr))
> +			return false;
> +
> +	/* Also check addr_seen: it tracks the address the guest is actually
> +	 * using, which may differ from configured addresses.
>  	 */
> -	if (IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addrs[0].addr) ||
> -	    IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addr_seen))
> +	if (IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addr_seen))

Really an overall series comment, rather than specific to this patch:

If we're allowing multiple addresses, it doesn't make sense to leave
the 'addr_seen' mechanism as-is.  If the guest actually uses multiple
addresses, then addr_seen will bounce around between them in a not
very meaningful way.

Personally, I've never been super-convinced that allowing the guest to
just use an arbitrary address we didn't give it is a good idea.  But,
I guess it's an established feature now.  I think the way to do that
in a multi-address environment would be to add addresses we observe
the guest using to the list of addresses.  They should probably be
flagged as having been observed coming from the guest, rather than
coming from either -a or the host.

>  		return false;
>  
>  	return true;
> @@ -534,11 +540,15 @@ static bool fwd_guest_accessible4(const struct ctx *c,
>  static bool fwd_guest_accessible6(const struct ctx *c,
>  				  const struct in6_addr *addr)
>  {
> +	int i;
> +
>  	if (IN6_IS_ADDR_LOOPBACK(addr))
>  		return false;
>  
> -	if (IN6_ARE_ADDR_EQUAL(addr, &c->ip6.addrs[0].addr))
> -		return false;
> +	/* Check against all configured guest addresses */
> +	for (i = 0; i < c->ip6.addr_count; i++)
> +		if (IN6_ARE_ADDR_EQUAL(addr, &c->ip6.addrs[i].addr))
> +			return false;
>  
>  	/* For IPv6, addr_seen starts unspecified, because we don't know what LL
>  	 * address the guest will take until we see it.  Only check against it
> -- 
> 2.51.1
> 

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC  06/12] arp: Check all configured addresses in ARP filtering
  2025-12-15  1:54 ` [RFC 06/12] arp: Check all configured addresses in ARP filtering Jon Maloy
@ 2025-12-15 10:07   ` David Gibson
  0 siblings, 0 replies; 25+ messages in thread
From: David Gibson @ 2025-12-15 10:07 UTC (permalink / raw)
  To: Jon Maloy; +Cc: sbrivio, dgibson, passt-dev

[-- Attachment #1: Type: text/plain, Size: 1665 bytes --]

On Sun, Dec 14, 2025 at 08:54:35PM -0500, Jon Maloy wrote:
> We update ignore_arp() to check against all addresses in the addrs[]
> array, not just addrs[0]. This ensures ARP requests for any of the
> guest's configured addresses are properly ignored.
> 
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>
> ---
>  arp.c | 9 ++++++---
>  1 file changed, 6 insertions(+), 3 deletions(-)
> 
> diff --git a/arp.c b/arp.c
> index 7eaf517..61c309e 100644
> --- a/arp.c
> +++ b/arp.c
> @@ -41,6 +41,8 @@
>  static bool ignore_arp(const struct ctx *c,
>  		       const struct arphdr *ah, const struct arpmsg *am)
>  {
> +	int i;
> +
>  	if (ah->ar_hrd != htons(ARPHRD_ETHER)	||
>  	    ah->ar_pro != htons(ETH_P_IP)	||
>  	    ah->ar_hln != ETH_ALEN		||
> @@ -53,9 +55,10 @@ static bool ignore_arp(const struct ctx *c,
>  	    !memcmp(am->sip, am->tip, sizeof(am->sip)))
>  		return true;
>  
> -	/* Don't resolve the guest's assigned address, either. */
> -	if (!memcmp(am->tip, &c->ip4.addrs[0].addr, sizeof(am->tip)))
> -		return true;
> +	/* Don't resolve any of the guest's assigned addresses, either */
> +	for (i = 0; i < c->ip4.addr_count; i++)
> +		if (!memcmp(am->tip, &c->ip4.addrs[i].addr, sizeof(am->tip)))
> +			return true;

Ah, right.  I think this needs to move earlier in the series,
otherwise things are broken in between allowing multiple addresses and
this change.

>  
>  	return false;
>  }
> -- 
> 2.51.1
> 

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC  07/12] netlink: Subscribe to link/address changes in namespace
  2025-12-15  1:54 ` [RFC 07/12] netlink: Subscribe to link/address changes in namespace Jon Maloy
@ 2025-12-15 10:32   ` David Gibson
  2025-12-15 23:25     ` Jon Maloy
  0 siblings, 1 reply; 25+ messages in thread
From: David Gibson @ 2025-12-15 10:32 UTC (permalink / raw)
  To: Jon Maloy; +Cc: sbrivio, dgibson, passt-dev

[-- Attachment #1: Type: text/plain, Size: 16979 bytes --]

On Sun, Dec 14, 2025 at 08:54:36PM -0500, Jon Maloy wrote:
> We add subscriptions to RTMGRP_LINK, RTMGRP_IPV4_IFADDR, and
> RTMGRP_IPV6_IFADDR, so that we can receive notifications when link
> state or addresses change on the namespace interface.
> 
> When addresses are discovered via netlink:
> 
> - We mark them as non-permanent, which means they can be modified or
>   deleted by subsequent events.
> - We apply the prefix indicated in the notification.
> - Update addr_seen to track the new address as the active one.

addr_seen isn't really about an "active" address.  The expectation was
that the guest would only use a single address, it just might not be
the one we told it to.

Now that we're aiming to allow multiple concurrent addresses, we can
expect the guest to use all of them actively.

> This provides the foundation for dynamic address monitoring,
> and supports runtime network changes.
> 
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>
> ---
>  epoll_type.h |   2 +
>  netlink.c    | 370 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  netlink.h    |   3 +
>  passt.c      |   5 +
>  passt.h      |   1 +
>  tap.c        |   6 +-
>  6 files changed, 384 insertions(+), 3 deletions(-)
> 
> diff --git a/epoll_type.h b/epoll_type.h
> index a90ffb6..0a16d94 100644
> --- a/epoll_type.h
> +++ b/epoll_type.h
> @@ -46,6 +46,8 @@ enum epoll_type {
>  	EPOLL_TYPE_REPAIR,
>  	/* Netlink neighbour subscription socket */
>  	EPOLL_TYPE_NL_NEIGH,
> +	/* Netlink link/address subscription socket */
> +	EPOLL_TYPE_NL_LINKADDR,
>  
>  	EPOLL_NUM_TYPES,
>  };
> diff --git a/netlink.c b/netlink.c
> index 82a2f0c..7492f17 100644
> --- a/netlink.c
> +++ b/netlink.c
> @@ -35,6 +35,9 @@
>  #include "passt.h"
>  #include "log.h"
>  #include "ip.h"
> +#include "tap.h"
> +#include "arp.h"
> +#include "ndp.h"
>  #include "netlink.h"
>  #include "epoll_ctl.h"
>  
> @@ -59,6 +62,7 @@
>  int nl_sock		 = -1;
>  int nl_sock_ns		 = -1;
>  static int nl_sock_neigh = -1;
> +static int nl_sock_linkaddr = -1;
>  static int nl_seq	 = 1;
>  
>  /**
> @@ -91,6 +95,372 @@ static int nl_sock_init_do(void *arg)
>  	return 0;
>  }
>  
> +/**
> + * nl_addr4_find() - Find an IPv4 address in the address array
> + * @c:		Execution context
> + * @addr:	Address to find
> + *
> + * Return: index if found, -1 otherwise
> + */
> +static int nl_addr4_find(const struct ctx *c, const struct in_addr *addr)
> +{
> +	int i;
> +
> +	for (i = 0; i < c->ip4.addr_count; i++)
> +		if (IN4_ARE_ADDR_EQUAL(&c->ip4.addrs[i].addr, addr))
> +			return (int)i;
> +
> +	return -1;
> +}
> +
> +/**
> + * nl_addr6_find() - Find an IPv6 address in the address array
> + * @c:		Execution context
> + * @addr:	Address to find
> + *
> + * Return: index if found, -1 otherwise
> + */
> +static int nl_addr6_find(const struct ctx *c, const struct in6_addr *addr)
> +{
> +	int i;
> +
> +	for (i = 0; i < c->ip6.addr_count; i++)
> +		if (IN6_ARE_ADDR_EQUAL(&c->ip6.addrs[i].addr, addr))
> +			return (int)i;
> +
> +	return -1;
> +}
> +
> +/**
> + * nl_addr4_add() - Add a discovered IPv4 address to the address array
> + * @c:		Execution context
> + * @addr:	Address to add
> + * @prefix_len: Prefix length
> + *
> + * Return: true if added or updated, false if array full or already permanent
> + */
> +static bool nl_addr4_add(struct ctx *c, const struct in_addr *addr,
> +			 int prefix_len)
> +{
> +	int idx = nl_addr4_find(c, addr);
> +
> +	if (idx >= 0) {
> +		/* Address exists - if permanent, don't touch; else update */
> +		if (c->ip4.addrs[idx].permanent)
> +			return false;
> +		c->ip4.addrs[idx].prefix_len = prefix_len;
> +		return true;
> +	}
> +
> +	/* New address - add if room */
> +	if (c->ip4.addr_count >= IP4_MAX_ADDRS) {
> +		debug("IPv4 address array full, ignoring discovered address");
> +		return false;
> +	}
> +
> +	idx = c->ip4.addr_count++;
> +	c->ip4.addrs[idx].addr = *addr;
> +	c->ip4.addrs[idx].prefix_len = prefix_len;
> +	c->ip4.addrs[idx].permanent = 0;
> +	return true;
> +}
> +
> +/**
> + * nl_addr6_add() - Add a discovered IPv6 address to the address array
> + * @c:		Execution context
> + * @addr:	Address to add
> + * @prefix_len: Prefix length
> + *
> + * Return: true if added or updated, false if array full or already permanent
> + */
> +static bool nl_addr6_add(struct ctx *c, const struct in6_addr *addr,
> +			 int prefix_len)
> +{
> +	int idx = nl_addr6_find(c, addr);
> +
> +	if (idx >= 0) {
> +		/* Address exists - if permanent, don't touch; else update */
> +		if (c->ip6.addrs[idx].permanent)
> +			return false;
> +		c->ip6.addrs[idx].prefix_len = prefix_len;
> +		return true;
> +	}
> +
> +	/* New address - add if room */
> +	if (c->ip6.addr_count >= IP6_MAX_ADDRS) {
> +		debug("IPv6 address array full, ignoring discovered address");
> +		return false;
> +	}
> +
> +	idx = c->ip6.addr_count++;
> +	c->ip6.addrs[idx].addr = *addr;
> +	c->ip6.addrs[idx].prefix_len = prefix_len;
> +	c->ip6.addrs[idx].permanent = 0;
> +	return true;
> +}
> +
> +/**
> + * nl_addr4_del() - Remove an IPv4 address from the array if not permanent
> + * @c:		Execution context
> + * @addr:	Address to remove
> + *
> + * Return: true if removed, false if not found or permanent
> + */
> +static bool nl_addr4_del(struct ctx *c, const struct in_addr *addr)
> +{
> +	int i, idx = nl_addr4_find(c, addr);
> +
> +	if (idx < 0)
> +		return false;
> +
> +	if (c->ip4.addrs[idx].permanent)
> +		return false;
> +
> +	/* Shift remaining entries down */
> +	c->ip4.addr_count--;
> +	for (i = idx; i < c->ip4.addr_count; i++)
> +		c->ip4.addrs[i] = c->ip4.addrs[i + 1];
> +
> +	return true;
> +}
> +
> +/**
> + * nl_addr6_del() - Remove an IPv6 address from the array if not permanent
> + * @c:		Execution context
> + * @addr:	Address to remove
> + *
> + * Return: true if removed, false if not found or permanent
> + */
> +static bool nl_addr6_del(struct ctx *c, const struct in6_addr *addr)
> +{
> +	int i, idx = nl_addr6_find(c, addr);
> +
> +	if (idx < 0)
> +		return false;
> +
> +	if (c->ip6.addrs[idx].permanent)
> +		return false;
> +
> +	/* Shift remaining entries down */
> +	c->ip6.addr_count--;
> +	for (i = idx; i < c->ip6.addr_count; i++)
> +		c->ip6.addrs[i] = c->ip6.addrs[i + 1];
> +
> +	return true;
> +}

All the functions above are more to do with the data structure storing
the addresses than they are to do with netlink.  Better to move them
into... maybe ip.c?  And use them from conf.c as well.

Given the amount of near-duplication here, maybe it would be better to
have a single table for v4 and v6 using inany_addr?

> +/**
> + * nl_linkaddr_msg_read() - Parse and log a netlink link/addr message
> + * @c:		Execution context
> + * @nh:	Netlink message header
> + */
> +static void nl_linkaddr_msg_read(struct ctx *c, const struct nlmsghdr *nh)
> +{
> +	if (nh->nlmsg_type == NLMSG_DONE || nh->nlmsg_type == NLMSG_ERROR)
> +		return;
> +
> +	if (nh->nlmsg_type == RTM_NEWLINK || nh->nlmsg_type == RTM_DELLINK) {
> +		const struct ifinfomsg *ifm = NLMSG_DATA(nh);
> +		struct rtattr *rta = IFLA_RTA(ifm);
> +		size_t na = IFLA_PAYLOAD(nh);
> +		const char *name = "?";
> +		bool up = !!(ifm->ifi_flags & IFF_UP);
> +		bool running = !!(ifm->ifi_flags & IFF_RUNNING);
> +
> +		for (; RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
> +			if (rta->rta_type == IFLA_IFNAME) {
> +				name = (const char *)RTA_DATA(rta);
> +				break;
> +			}
> +		}
> +
> +		/* Update pasta interface UP state if this is our interface */
> +		if (c->mode == MODE_PASTA &&
> +		    (unsigned int)ifm->ifi_index == c->pasta_ifi) {
> +			c->pasta_ifi_up = up;
> +			debug("Interface %s", up ? "UP" : "DOWN");

This only makes sense if we're listening to netlink messages in the
guest netns, but the address stuff only makes sense listening to
messages in the host netns.

> +		}
> +
> +		if (nh->nlmsg_type == RTM_NEWLINK)
> +			debug("Link %s (idx=%d): %s %s", name, ifm->ifi_index,
> +			     up ? "UP" : "DOWN", running ? "RUNNING" : "");
> +		else
> +			debug("Link %s (idx=%d): DELETED", name, ifm->ifi_index);
> +
> +		return;
> +	}
> +
> +	if (nh->nlmsg_type == RTM_NEWADDR || nh->nlmsg_type == RTM_DELADDR) {
> +		bool is_new = (nh->nlmsg_type == RTM_NEWADDR);
> +		const struct ifaddrmsg *ifa = NLMSG_DATA(nh);
> +		char addr_str[INET6_ADDRSTRLEN];
> +		struct rtattr *rta = IFA_RTA(ifa);
> +		char ifname[IFNAMSIZ] = { 0 };
> +		size_t na = IFA_PAYLOAD(nh);
> +		void *addr = NULL;
> +		for (; RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
> +			if (ifa->ifa_family == AF_INET &&
> +			    rta->rta_type == IFA_LOCAL) {
> +				addr = RTA_DATA(rta);
> +				break;
> +			} else if (ifa->ifa_family == AF_INET6 &&
> +				   rta->rta_type == IFA_ADDRESS) {
> +				addr = RTA_DATA(rta);
> +				break;
> +			}
> +		}
> +
> +		if (!addr)
> +			return;
> +
> +		if_indextoname(ifa->ifa_index, ifname);
> +		inet_ntop(ifa->ifa_family, addr, addr_str, sizeof(addr_str));
> +
> +		debug("%s addr on %s (index=%d): %s/%i%s",
> +		      is_new ? "NEW" : "DEL", ifname, ifa->ifa_index, addr_str,
> +		      ifa->ifa_prefixlen,
> +		      tap_is_ready(c) ? " (tap UP)" : " (tap DOWN)");
> +
> +		/* Only handle our pasta interface */
> +		if (c->mode != MODE_PASTA || ifa->ifa_index != c->pasta_ifi)
> +			return;

Nope.  This is a host netns event, so comparing to pasta_ifi makes no
sense.  We _should_ be comparing to ifi4 or ifi6 (depending on address
family), and we should probably do that before we go parsing the
details above.

We should also probably check for --no-copy-addrs here, too.

In the other direction, even for PASST mode we can store this address
in our table, it just won't do anything until DHCP or whatever
consults it.

> +
> +		if (ifa->ifa_family == AF_INET) {
> +			struct in_addr *a = (struct in_addr *)addr;
> +
> +			if (!is_new) {
> +				nl_addr4_del(c, a);
> +				return;
> +			}
> +
> +			if (nl_addr4_add(c, a, ifa->ifa_prefixlen)) {
> +				c->ip4.addr_seen = *a;
> +				if (c->pasta_ifi_up && c->ifi4) {
> +					debug("Sending ARP");
> +					arp_send_init_req(c);

What does this ARP request do?  AFAICT we haven't actually added the
address in the guest netns yet, so the guest won't respond to the ARP.

> +				}
> +			}
> +		} else if (ifa->ifa_family == AF_INET6) {
> +			struct in6_addr *a = (struct in6_addr *)addr;
> +
> +			if (!is_new) {
> +				nl_addr6_del(c, a);
> +				return;
> +			}
> +
> +			if (nl_addr6_add(c, a,
> +					 ifa->ifa_prefixlen)) {
> +				c->ip6.addr_seen = *a;
> +				if (c->pasta_ifi_up &&
> +				    c->ifi6 && !c->no_ndp) {
> +					debug("Sending NDP");
> +					ndp_send_init_req(c);

Some question with this NDP.

> +				}
> +			}
> +		}
> +	}
> +}
> +
> +/**
> + * nl_linkaddr_notify_handler() - Handle events from link/addr notifier socket
> + * @c:		Execution context
> + */
> +void nl_linkaddr_notify_handler(struct ctx *c)
> +{
> +	char buf[NLBUFSIZ];
> +
> +	for (;;) {
> +		ssize_t n = recv(nl_sock_linkaddr, buf, sizeof(buf), MSG_DONTWAIT);
> +		struct nlmsghdr *nh = (struct nlmsghdr *)buf;
> +
> +		if (n < 0) {
> +			if (errno == EINTR)
> +				continue;
> +			if (errno != EAGAIN)
> +				debug("recv() error: %s", strerror_(errno));
> +			break;
> +		}
> +
> +		debug("Received %zd bytes", n);
> +
> +		for (; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n))
> +			nl_linkaddr_msg_read(c, nh);
> +	}
> +}
> +
> +/**
> + * nl_linkaddr_init_do() - Actually create and bind the netlink socket
> + * @arg:	Execution context (for namespace entry) or NULL
> + *
> + * Return: 0 on success, -1 on failure
> + */
> +static int nl_linkaddr_init_do(void *arg)
> +{
> +	struct sockaddr_nl addr = { .nl_family = AF_NETLINK,
> +		.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR |
> +			     RTMGRP_IPV6_IFADDR };
> +
> +	if (arg)
> +		ns_enter((struct ctx *)arg);
> +
> +	nl_sock_linkaddr = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);

Is there a reason to use an additional socket, rather than adding more
events to the neighbour listening socket?

> +	if (nl_sock_linkaddr < 0) {
> +		debug("socket() failed: %s", strerror_(errno));
> +		return -1;
> +	}
> +
> +	if (bind(nl_sock_linkaddr, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
> +		debug("bind() failed: %s", strerror_(errno));
> +		close(nl_sock_linkaddr);
> +		nl_sock_linkaddr = -1;
> +		return -1;
> +	}
> +
> +	debug("socket fd=%d", nl_sock_linkaddr);
> +	return 0;
> +}
> +
> +/**
> + * nl_linkaddr_notify_init() - Initialize link/address change notifier
> + * @c:		Execution context
> + *
> + * Return: 0 on success, -1 on failure
> + */
> +int nl_linkaddr_notify_init(const struct ctx *c)
> +{
> +	union epoll_ref ref = { .type = EPOLL_TYPE_NL_LINKADDR };
> +	struct epoll_event ev = { .events = EPOLLIN };
> +
> +	if (nl_sock_linkaddr >= 0) {
> +		debug("notifier already initialized (fd=%d)", nl_sock_linkaddr);
> +		return 0;
> +	}
> +
> +	/* Open the notifier socket in the namespace for pasta mode,
> +	 * or in the init namespace otherwise.

Definitely wrong.  We're trying to watch host addresses so that we can
copy them to the guest - therefore we always need to watch in the host
netns.  On pasta there might be reasons to *also* listen in the guest
netns, but that would want a different handler to do different things.

> +	 */
> +	if (c->mode == MODE_PASTA)
> +		NS_CALL(nl_linkaddr_init_do, (void *)c);
> +	else
> +		nl_linkaddr_init_do(NULL);
> +
> +	if (nl_sock_linkaddr < 0) {
> +		warn("Failed to create/bind link/addr notifier socket");
> +		return -1;
> +	}
> +
> +	ev.data.u64 = ref.u64;
> +	if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, nl_sock_linkaddr, &ev) == -1) {
> +		warn("epoll_ctl() failed on link/addr notifier socket: %s",
> +		     strerror_(errno));
> +		close(nl_sock_linkaddr);
> +		nl_sock_linkaddr = -1;
> +		return -1;
> +	}
> +
> +	return 0;
> +}
>  /**
>   * nl_sock_init() - Call nl_sock_init_do(), won't return on failure
>   * @c:		Execution context
> diff --git a/netlink.h b/netlink.h
> index 8f1e9b9..1796a72 100644
> --- a/netlink.h
> +++ b/netlink.h
> @@ -33,4 +33,7 @@ int nl_link_set_flags(int s, unsigned int ifi,
>  int nl_neigh_notify_init(const struct ctx *c);
>  void nl_neigh_notify_handler(const struct ctx *c);
>  
> +int nl_linkaddr_notify_init(const struct ctx *c);
> +void nl_linkaddr_notify_handler(struct ctx *c);
> +
>  #endif /* NETLINK_H */
> diff --git a/passt.c b/passt.c
> index 5ed88d0..f274858 100644
> --- a/passt.c
> +++ b/passt.c
> @@ -80,6 +80,7 @@ char *epoll_type_str[] = {
>  	[EPOLL_TYPE_REPAIR_LISTEN]	= "TCP_REPAIR helper listening socket",
>  	[EPOLL_TYPE_REPAIR]		= "TCP_REPAIR helper socket",
>  	[EPOLL_TYPE_NL_NEIGH]		= "netlink neighbour notifier socket",
> +	[EPOLL_TYPE_NL_LINKADDR]	= "netlink link/address notifier socket",
>  };
>  static_assert(ARRAY_SIZE(epoll_type_str) == EPOLL_NUM_TYPES,
>  	      "epoll_type_str[] doesn't match enum epoll_type");
> @@ -304,6 +305,9 @@ static void passt_worker(void *opaque, int nfds, struct epoll_event *events)
>  		case EPOLL_TYPE_NL_NEIGH:
>  			nl_neigh_notify_handler(c);
>  			break;
> +		case EPOLL_TYPE_NL_LINKADDR:
> +			nl_linkaddr_notify_handler(c);
> +			break;
>  		default:
>  			/* Can't happen */
>  			ASSERT(0);
> @@ -413,6 +417,7 @@ int main(int argc, char **argv)
>  
>  	fwd_neigh_table_init(&c);
>  	nl_neigh_notify_init(&c);
> +	nl_linkaddr_notify_init(&c);
>  
>  	if (!c.foreground) {
>  		if ((devnull_fd = open("/dev/null", O_RDWR | O_CLOEXEC)) < 0)
> diff --git a/passt.h b/passt.h
> index 533f2cb..70ccaf1 100644
> --- a/passt.h
> +++ b/passt.h
> @@ -264,6 +264,7 @@ struct ctx {
>  	char pasta_ifn[IF_NAMESIZE];
>  	unsigned int pasta_ifi;
>  	int pasta_conf_ns;
> +	int pasta_ifi_up;	/* Namespace interface is UP */
>  
>  	int no_tcp;
>  	struct tcp_ctx tcp;
> diff --git a/tap.c b/tap.c
> index 0b96cc1..a2a4459 100644
> --- a/tap.c
> +++ b/tap.c
> @@ -1363,10 +1363,10 @@ bool tap_is_ready(const struct ctx *c)
>  		return false;
>  
>  	if (c->mode == MODE_PASTA) {
> -		/* If pasta_conf_ns is set, the interface was configured and
> -		 * brought up during initialization. If not, it's still down.
> +		/* Check if the namespace interface is actually UP.
> +		 * This is tracked by netlink link notifications.
>  		 */
> -		return c->pasta_conf_ns;
> +		return c->pasta_ifi_up;
>  	}
>  
>  	return true;
> -- 
> 2.51.1
> 

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC  08/12] netlink: Subscribe to route changes in namespace
  2025-12-15  1:54 ` [RFC 08/12] netlink: Subscribe to route " Jon Maloy
@ 2025-12-15 10:38   ` David Gibson
  0 siblings, 0 replies; 25+ messages in thread
From: David Gibson @ 2025-12-15 10:38 UTC (permalink / raw)
  To: Jon Maloy; +Cc: sbrivio, dgibson, passt-dev

[-- Attachment #1: Type: text/plain, Size: 3600 bytes --]

On Sun, Dec 14, 2025 at 08:54:37PM -0500, Jon Maloy wrote:
> We add subscriptions to RTMGRP_IPV4_ROUTE and RTMGRP_IPV6_ROUTE, so
> that we receive notifications when routes change on the namespace
> interface.

No, we'd need to listen in the host netns, so we can transfer those
route changes to the guest netns.

> When default routes change on the pasta interface, we update guest_gw
> (and our_tap_addr for IPv4) to reflect the new gateway. This handles
> both routes propagated from the host and routes configured manually
> by the user inside the namespace.
> 
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>
> ---
>  netlink.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 46 insertions(+), 3 deletions(-)
> 
> diff --git a/netlink.c b/netlink.c
> index 7492f17..a8d3116 100644
> --- a/netlink.c
> +++ b/netlink.c
> @@ -195,7 +195,7 @@ static bool nl_addr6_add(struct ctx *c, const struct in6_addr *addr,
>  	idx = c->ip6.addr_count++;
>  	c->ip6.addrs[idx].addr = *addr;
>  	c->ip6.addrs[idx].prefix_len = prefix_len;
> -	c->ip6.addrs[idx].permanent = 0;
> +	c->ip6.addrs[idxyes].permanent = 0;

Um... what?

>  	return true;
>  }
>  
> @@ -359,6 +359,49 @@ static void nl_linkaddr_msg_read(struct ctx *c, const struct nlmsghdr *nh)
>  				}
>  			}
>  		}
> +		return;
> +	}
> +
> +	if (nh->nlmsg_type == RTM_NEWROUTE || nh->nlmsg_type == RTM_DELROUTE) {
> +		bool is_new = (nh->nlmsg_type == RTM_NEWROUTE);
> +		const struct rtmsg *rtm = NLMSG_DATA(nh);
> +		struct rtattr *rta = RTM_RTA(rtm);
> +		size_t na = RTM_PAYLOAD(nh);
> +		unsigned int oif = 0;
> +		void *gw = NULL;
> +
> +		/* Only interested in default routes (dst_len == 0) */

No, we copy non-default routes as well.

> +		if (rtm->rtm_dst_len != 0)
> +			return;
> +
> +		for (; RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
> +			if (rta->rta_type == RTA_GATEWAY)
> +				gw = RTA_DATA(rta);
> +			else if (rta->rta_type == RTA_OIF)
> +				oif = *(unsigned int *)RTA_DATA(rta);
> +		}
> +
> +		if (!gw)

We copy non-gateway routes too (and may well need to, because there's
typically at least one non-gw route needed to reach the gateway
itself).

> +			return;
> +
> +		/* Only handle our pasta interface */
> +		if (c->mode != MODE_PASTA || oif != c->pasta_ifi)
> +			return;

Again, we need to be listening in the host netns, so pasta_ifi makes
no sense.

> +
> +		if (rtm->rtm_family == AF_INET) {
> +			if (is_new) {
> +				c->ip4.guest_gw = *(struct in_addr *)gw;
> +				c->ip4.our_tap_addr = c->ip4.guest_gw;
> +			} else {
> +				c->ip4.guest_gw = (struct in_addr){ 0 };
> +				c->ip4.our_tap_addr = (struct in_addr){ 0 };
> +			}
> +		} else if (rtm->rtm_family == AF_INET6) {
> +			if (is_new)
> +				c->ip6.guest_gw = *(struct in6_addr *)gw;
> +			else
> +				c->ip6.guest_gw = (struct in6_addr){ 0 };
> +		}
>  	}
>  }
>  
> @@ -398,8 +441,8 @@ void nl_linkaddr_notify_handler(struct ctx *c)
>  static int nl_linkaddr_init_do(void *arg)
>  {
>  	struct sockaddr_nl addr = { .nl_family = AF_NETLINK,
> -		.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR |
> -			     RTMGRP_IPV6_IFADDR };
> +		.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
> +			     RTMGRP_IPV4_ROUTE | RTMGRP_IPV6_ROUTE };
>  
>  	if (arg)
>  		ns_enter((struct ctx *)arg);
> -- 
> 2.51.1
> 

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC 01/12] ip: Introduce multi-address data structures for IPv4 and IPv6
  2025-12-15  9:40   ` David Gibson
@ 2025-12-15 22:05     ` Jon Maloy
  0 siblings, 0 replies; 25+ messages in thread
From: Jon Maloy @ 2025-12-15 22:05 UTC (permalink / raw)
  To: passt-dev



On 2025-12-15 04:40, David Gibson wrote:
> On Sun, Dec 14, 2025 at 08:54:30PM -0500, Jon Maloy wrote:
>> As preparation for supporting multiple addresses per interface, we
>> replace the single addr/prefix_len fields with arrays.
>>
>> - We add an ip4_addr_entry and an ip6_addr_entry struct containing
>>    address and prefix length.
>>
>> - We set the array sizes to IP4_MAX_ADDRS=8 and IP6_MAX_ADDRS=16,
>>    respectively.
>>
>> The only functional change is that the IPv6 prefix length now is
>> properly stored instead of being hardcoded to 64 even when set
>> via the -a option.
>>
>> Signed-off-by: Jon Maloy <jmaloy@redhat.com>
>> ---
>>   arp.c    |  4 +--
>>   conf.c   | 97 +++++++++++++++++++++++++++++++++-----------------------
>>   dhcp.c   |  8 ++---
>>   dhcpv6.c |  6 ++--
>>   fwd.c    | 12 +++----
>>   ip.h     | 26 +++++++++++++++
>>   ndp.c    |  6 ++--
>>   passt.h  | 16 ++++++----
>>   pasta.c  | 12 ++++---
>>   tap.c    |  4 +--
>>   util.h   |  1 +
>>   11 files changed, 122 insertions(+), 70 deletions(-)
>>
>> diff --git a/arp.c b/arp.c
>> index bb042e9..7eaf517 100644
>> --- a/arp.c
>> +++ b/arp.c
>> @@ -54,7 +54,7 @@ static bool ignore_arp(const struct ctx *c,
>>   		return true;
>>   
>>   	/* Don't resolve the guest's assigned address, either. */
>> -	if (!memcmp(am->tip, &c->ip4.addr, sizeof(am->tip)))
>> +	if (!memcmp(am->tip, &c->ip4.addrs[0].addr, sizeof(am->tip)))
>>   		return true;
>>   
>>   	return false;
>> @@ -145,7 +145,7 @@ void arp_send_init_req(const struct ctx *c)
>>   	memcpy(req.am.sha,	c->our_tap_mac,		sizeof(req.am.sha));
>>   	memcpy(req.am.sip,	&c->ip4.our_tap_addr,	sizeof(req.am.sip));
>>   	memcpy(req.am.tha,	MAC_BROADCAST,		sizeof(req.am.tha));
>> -	memcpy(req.am.tip,	&c->ip4.addr,		sizeof(req.am.tip));
>> +	memcpy(req.am.tip,	&c->ip4.addrs[0].addr,	sizeof(req.am.tip));
>>   
>>   	debug("Sending initial ARP request for guest MAC address");
>>   	tap_send_single(c, &req, sizeof(req));
>> diff --git a/conf.c b/conf.c
>> index fdc19e8..0e96f36 100644
>> --- a/conf.c
>> +++ b/conf.c
>> @@ -694,10 +694,12 @@ static int conf_ip4_prefix(const char *arg)
>>    * conf_ip4() - Verify or detect IPv4 support, get relevant addresses
>>    * @ifi:	Host interface to attempt (0 to determine one)
>>    * @ip4:	IPv4 context (will be written)
>> + * @permanent:	Mark configured addresses as permanent
>>    *
>>    * Return: interface index for IPv4, or 0 on failure.
>>    */
>> -static unsigned int conf_ip4(unsigned int ifi, struct ip4_ctx *ip4)
>> +static unsigned int conf_ip4(unsigned int ifi, struct ip4_ctx *ip4,
>> +			     bool permanent)
>>   {
>>   	if (!ifi)
>>   		ifi = nl_get_ext_if(nl_sock, AF_INET);
>> @@ -717,33 +719,38 @@ static unsigned int conf_ip4(unsigned int ifi, struct ip4_ctx *ip4)
>>   		}
>>   	}
>>   
>> -	if (IN4_IS_ADDR_UNSPECIFIED(&ip4->addr)) {
>> +	if (!ip4->addr_count) {
>>   		int rc = nl_addr_get(nl_sock, ifi, AF_INET,
>> -				     &ip4->addr, &ip4->prefix_len, NULL);
>> +				     &ip4->addrs[0].addr,
>> +				     &ip4->addrs[0].prefix_len, NULL);
>>   		if (rc < 0) {
>>   			debug("Couldn't discover IPv4 address: %s",
>>   			      strerror_(-rc));
>>   			return 0;
>>   		}
>> +		ip4->addrs[0].permanent = permanent;
>> +		ip4->addr_count = 1;
>>   	}
>>   
>> -	if (!ip4->prefix_len) {
>> -		in_addr_t addr = ntohl(ip4->addr.s_addr);
>> -		if (IN_CLASSA(addr))
>> -			ip4->prefix_len = (32 - IN_CLASSA_NSHIFT);
>> -		else if (IN_CLASSB(addr))
>> -			ip4->prefix_len = (32 - IN_CLASSB_NSHIFT);
>> -		else if (IN_CLASSC(addr))
>> -			ip4->prefix_len = (32 - IN_CLASSC_NSHIFT);
>> +	/* Apply default prefix_len to first address if not set */
>> +	if (!ip4->addrs[0].prefix_len) {
>> +		in_addr_t a = ntohl(ip4->addrs[0].addr.s_addr);
>> +
>> +		if (IN_CLASSA(a))
>> +			ip4->addrs[0].prefix_len = 8;
>> +		else if (IN_CLASSB(a))
>> +			ip4->addrs[0].prefix_len = 16;
>> +		else if (IN_CLASSC(a))
>> +			ip4->addrs[0].prefix_len = 24;
>>   		else
>> -			ip4->prefix_len = 32;
>> +			ip4->addrs[0].prefix_len = 32;
>>   	}
>>   
>> -	ip4->addr_seen = ip4->addr;
>> +	ip4->addr_seen = ip4->addrs[0].addr;
>>   
>>   	ip4->our_tap_addr = ip4->guest_gw;
>>   
>> -	if (IN4_IS_ADDR_UNSPECIFIED(&ip4->addr))
>> +	if (IN4_IS_ADDR_UNSPECIFIED(&ip4->addrs[0].addr))
>>   		return 0;
>>   
>>   	return ifi;
>> @@ -755,9 +762,9 @@ static unsigned int conf_ip4(unsigned int ifi, struct ip4_ctx *ip4)
>>    */
>>   static void conf_ip4_local(struct ip4_ctx *ip4)
>>   {
>> -	ip4->addr_seen = ip4->addr = IP4_LL_GUEST_ADDR;
>> +	ip4->addr_seen = ip4->addrs[0].addr = IP4_LL_GUEST_ADDR;
>>   	ip4->our_tap_addr = ip4->guest_gw = IP4_LL_GUEST_GW;
>> -	ip4->prefix_len = IP4_LL_PREFIX_LEN;
>> +	ip4->addrs[0].prefix_len = IP4_LL_PREFIX_LEN;
>>   
>>   	ip4->no_copy_addrs = ip4->no_copy_routes = true;
>>   }
>> @@ -766,10 +773,12 @@ static void conf_ip4_local(struct ip4_ctx *ip4)
>>    * conf_ip6() - Verify or detect IPv6 support, get relevant addresses
>>    * @ifi:	Host interface to attempt (0 to determine one)
>>    * @ip6:	IPv6 context (will be written)
>> + * @permanent:	Mark discovered addresses as permanent
>>    *
>>    * Return: interface index for IPv6, or 0 on failure.
>>    */
>> -static unsigned int conf_ip6(unsigned int ifi, struct ip6_ctx *ip6)
>> +static unsigned int conf_ip6(unsigned int ifi, struct ip6_ctx *ip6,
>> +			     bool permanent)
>>   {
>>   	int prefix_len = 0;
>>   	int rc;
>> @@ -792,19 +801,25 @@ static unsigned int conf_ip6(unsigned int ifi, struct ip6_ctx *ip6)
>>   	}
>>   
>>   	rc = nl_addr_get(nl_sock, ifi, AF_INET6,
>> -			 IN6_IS_ADDR_UNSPECIFIED(&ip6->addr) ? &ip6->addr : NULL,
>> +			 ip6->addr_count ? NULL : &ip6->addrs[0].addr,
>>   			 &prefix_len, &ip6->our_tap_ll);
>>   	if (rc < 0) {
>>   		debug("Couldn't discover IPv6 address: %s", strerror_(-rc));
>>   		return 0;
>>   	}
>>   
>> -	ip6->addr_seen = ip6->addr;
>> +	if (!ip6->addr_count) {
>> +		ip6->addrs[0].prefix_len = prefix_len ? prefix_len : 64;
>> +		ip6->addrs[0].permanent = permanent;
>> +		ip6->addr_count = 1;
>> +	}
>> +
>> +	ip6->addr_seen = ip6->addrs[0].addr;
>>   
>>   	if (IN6_IS_ADDR_LINKLOCAL(&ip6->guest_gw))
>>   		ip6->our_tap_ll = ip6->guest_gw;
>>   
>> -	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->addr) ||
>> +	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->addrs[0].addr) ||
>>   	    IN6_IS_ADDR_UNSPECIFIED(&ip6->our_tap_ll))
>>   		return 0;
>>   
>> @@ -1149,11 +1164,13 @@ static void conf_print(const struct ctx *c)
>>   		if (!c->no_dhcp) {
>>   			uint32_t mask;
>>   
>> -			mask = htonl(0xffffffff << (32 - c->ip4.prefix_len));
>> +			mask = htonl(0xffffffff <<
>> +				     (32 - c->ip4.addrs[0].prefix_len));
>>   
>>   			info("DHCP:");
>>   			info("    assign: %s",
>> -			     inet_ntop(AF_INET, &c->ip4.addr, buf4, sizeof(buf4)));
>> +			     inet_ntop(AF_INET, &c->ip4.addrs[0].addr,
>> +				       buf4, sizeof(buf4)));
>>   			info("    mask: %s",
>>   			     inet_ntop(AF_INET, &mask,        buf4, sizeof(buf4)));
>>   			info("    router: %s",
>> @@ -1191,7 +1208,8 @@ static void conf_print(const struct ctx *c)
>>   			goto dns6;
>>   
>>   		info("    assign: %s",
>> -		     inet_ntop(AF_INET6, &c->ip6.addr, buf6, sizeof(buf6)));
>> +		     inet_ntop(AF_INET6, &c->ip6.addrs[0].addr,
>> +			       buf6, sizeof(buf6)));
>>   		info("    router: %s",
>>   		     inet_ntop(AF_INET6, &c->ip6.guest_gw, buf6, sizeof(buf6)));
>>   		info("    our link-local: %s",
>> @@ -1812,22 +1830,23 @@ void conf(struct ctx *c, int argc, char **argv)
>>   			break;
>>   		}
>>   		case 'a':
>> -			if (inet_pton(AF_INET6, optarg, &c->ip6.addr)	&&
>> -			    !IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr)	&&
>> -			    !IN6_IS_ADDR_LOOPBACK(&c->ip6.addr)		&&
>> -			    !IN6_IS_ADDR_V4MAPPED(&c->ip6.addr)		&&
>> -			    !IN6_IS_ADDR_V4COMPAT(&c->ip6.addr)		&&
>> -			    !IN6_IS_ADDR_MULTICAST(&c->ip6.addr)) {
>> +			if (inet_pton(AF_INET6, optarg,
>> +				      &c->ip6.addrs[0].addr) &&
>> +			    !IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addrs[0].addr) &&
>> +			    !IN6_IS_ADDR_LOOPBACK(&c->ip6.addrs[0].addr) &&
>> +			    !IN6_IS_ADDR_V4MAPPED(&c->ip6.addrs[0].addr) &&
>> +			    !IN6_IS_ADDR_V4COMPAT(&c->ip6.addrs[0].addr) &&
>> +			    !IN6_IS_ADDR_MULTICAST(&c->ip6.addrs[0].addr)) {
>>   				if (c->mode == MODE_PASTA)
>>   					c->ip6.no_copy_addrs = true;
>>   				break;
>>   			}
>>   
>> -			if (inet_pton(AF_INET, optarg, &c->ip4.addr)	&&
>> -			    !IN4_IS_ADDR_UNSPECIFIED(&c->ip4.addr)	&&
>> -			    !IN4_IS_ADDR_BROADCAST(&c->ip4.addr)	&&
>> -			    !IN4_IS_ADDR_LOOPBACK(&c->ip4.addr)		&&
>> -			    !IN4_IS_ADDR_MULTICAST(&c->ip4.addr)) {
>> +			if (inet_pton(AF_INET, optarg, &c->ip4.addrs[0].addr) &&
>> +			    !IN4_IS_ADDR_UNSPECIFIED(&c->ip4.addrs[0].addr) &&
>> +			    !IN4_IS_ADDR_BROADCAST(&c->ip4.addrs[0].addr) &&
>> +			    !IN4_IS_ADDR_LOOPBACK(&c->ip4.addrs[0].addr) &&
>> +			    !IN4_IS_ADDR_MULTICAST(&c->ip4.addrs[0].addr)) {
>>   				if (c->mode == MODE_PASTA)
>>   					c->ip4.no_copy_addrs = true;
>>   				break;
>> @@ -1836,8 +1855,8 @@ void conf(struct ctx *c, int argc, char **argv)
>>   			die("Invalid address: %s", optarg);
>>   			break;
>>   		case 'n':
>> -			c->ip4.prefix_len = conf_ip4_prefix(optarg);
>> -			if (c->ip4.prefix_len < 0)
>> +			c->ip4.addrs[0].prefix_len = conf_ip4_prefix(optarg);
>> +			if (c->ip4.addrs[0].prefix_len < 0)
>>   				die("Invalid netmask: %s", optarg);
>>   
>>   			break;
>> @@ -1984,9 +2003,9 @@ void conf(struct ctx *c, int argc, char **argv)
>>   
>>   	nl_sock_init(c, false);
>>   	if (!v6_only)
>> -		c->ifi4 = conf_ip4(ifi4, &c->ip4);
>> +		c->ifi4 = conf_ip4(ifi4, &c->ip4, c->pasta_conf_ns);
>>   	if (!v4_only)
>> -		c->ifi6 = conf_ip6(ifi6, &c->ip6);
>> +		c->ifi6 = conf_ip6(ifi6, &c->ip6, c->pasta_conf_ns);
>>   
>>   	if (c->ifi4 && c->mtu < IPV4_MIN_MTU) {
>>   		warn("MTU %"PRIu16" is too small for IPv4 (minimum %u)",
>> @@ -2125,7 +2144,7 @@ void conf(struct ctx *c, int argc, char **argv)
>>   	if (!c->ifi6) {
>>   		c->no_ndp = 1;
>>   		c->no_dhcpv6 = 1;
>> -	} else if (IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr)) {
>> +	} else if (IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addrs[0].addr)) {
>>   		c->no_dhcpv6 = 1;
>>   	}
>>   
>> diff --git a/dhcp.c b/dhcp.c
>> index 6b9c2e3..46ef8e3 100644
>> --- a/dhcp.c
>> +++ b/dhcp.c
>> @@ -352,7 +352,7 @@ int dhcp(const struct ctx *c, struct iov_tail *data)
>>   	reply.secs		= 0;
>>   	reply.flags		= m->flags;
>>   	reply.ciaddr		= m->ciaddr;
>> -	reply.yiaddr		= c->ip4.addr;
>> +	reply.yiaddr		= c->ip4.addrs[0].addr;
>>   	reply.siaddr		= 0;
>>   	reply.giaddr		= m->giaddr;
>>   	memcpy(&reply.chaddr,	m->chaddr,	sizeof(reply.chaddr));
>> @@ -404,7 +404,7 @@ int dhcp(const struct ctx *c, struct iov_tail *data)
>>   
>>   	info("    from %s", eth_ntop(m->chaddr, macstr, sizeof(macstr)));
>>   
>> -	mask.s_addr = htonl(0xffffffff << (32 - c->ip4.prefix_len));
>> +	mask.s_addr = htonl(0xffffffff << (32 - c->ip4.addrs[0].prefix_len));
>>   	memcpy(opts[1].s,  &mask,                sizeof(mask));
>>   	memcpy(opts[3].s,  &c->ip4.guest_gw,     sizeof(c->ip4.guest_gw));
>>   	memcpy(opts[54].s, &c->ip4.our_tap_addr, sizeof(c->ip4.our_tap_addr));
>> @@ -412,7 +412,7 @@ int dhcp(const struct ctx *c, struct iov_tail *data)
>>   	/* If the gateway is not on the assigned subnet, send an option 121
>>   	 * (Classless Static Routing) adding a dummy route to it.
>>   	 */
>> -	if ((c->ip4.addr.s_addr & mask.s_addr)
>> +	if ((c->ip4.addrs[0].addr.s_addr & mask.s_addr)
>>   	    != (c->ip4.guest_gw.s_addr & mask.s_addr)) {
>>   		/* a.b.c.d/32:0.0.0.0, 0:a.b.c.d */
>>   		opts[121].slen = 14;
>> @@ -469,7 +469,7 @@ int dhcp(const struct ctx *c, struct iov_tail *data)
>>   	if (m->flags & FLAG_BROADCAST)
>>   		dst = in4addr_broadcast;
>>   	else
>> -		dst = c->ip4.addr;
>> +		dst = c->ip4.addrs[0].addr;
>>   
>>   	tap_udp4_send(c, c->ip4.our_tap_addr, 67, dst, 68, &reply, dlen);
>>   
>> diff --git a/dhcpv6.c b/dhcpv6.c
>> index e4df0db..7eae6a1 100644
>> --- a/dhcpv6.c
>> +++ b/dhcpv6.c
>> @@ -625,7 +625,7 @@ int dhcpv6(struct ctx *c, struct iov_tail *data,
>>   		if (mh->type == TYPE_CONFIRM && server_id)
>>   			return -1;
>>   
>> -		if (dhcpv6_ia_notonlink(data, &c->ip6.addr)) {
>> +		if (dhcpv6_ia_notonlink(data, &c->ip6.addrs[0].addr)) {
>>   
>>   			dhcpv6_send_ia_notonlink(c, data, &client_id_base,
>>   						 ntohs(client_id->l), mh->xid);
>> @@ -679,7 +679,7 @@ int dhcpv6(struct ctx *c, struct iov_tail *data,
>>   
>>   	tap_udp6_send(c, src, 547, tap_ip6_daddr(c, src), 546,
>>   		      mh->xid, &resp, n);
>> -	c->ip6.addr_seen = c->ip6.addr;
>> +	c->ip6.addr_seen = c->ip6.addrs[0].addr;
>>   
>>   	return 1;
>>   }
>> @@ -703,5 +703,5 @@ void dhcpv6_init(const struct ctx *c)
>>   	memcpy(resp_not_on_link.server_id.duid_lladdr,
>>   	       c->our_tap_mac, sizeof(c->our_tap_mac));
>>   
>> -	resp.ia_addr.addr	= c->ip6.addr;
>> +	resp.ia_addr.addr	= c->ip6.addrs[0].addr;
>>   }
>> diff --git a/fwd.c b/fwd.c
>> index 44a0e10..408af30 100644
>> --- a/fwd.c
>> +++ b/fwd.c
>> @@ -516,7 +516,7 @@ static bool fwd_guest_accessible4(const struct ctx *c,
>>   	/* For IPv4, addr_seen is initialised to addr, so is always a valid
>>   	 * address
>>   	 */
>> -	if (IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addr) ||
>> +	if (IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addrs[0].addr) ||
>>   	    IN4_ARE_ADDR_EQUAL(addr, &c->ip4.addr_seen))
>>   		return false;
>>   
>> @@ -537,7 +537,7 @@ static bool fwd_guest_accessible6(const struct ctx *c,
>>   	if (IN6_IS_ADDR_LOOPBACK(addr))
>>   		return false;
>>   
>> -	if (IN6_ARE_ADDR_EQUAL(addr, &c->ip6.addr))
>> +	if (IN6_ARE_ADDR_EQUAL(addr, &c->ip6.addrs[0].addr))
>>   		return false;
>>   
>>   	/* For IPv6, addr_seen starts unspecified, because we don't know what LL
>> @@ -587,9 +587,9 @@ static void nat_outbound(const struct ctx *c, const union inany_addr *addr,
>>   	else if (inany_equals6(addr, &c->ip6.map_host_loopback))
>>   		*translated = inany_loopback6;
>>   	else if (inany_equals4(addr, &c->ip4.map_guest_addr))
>> -		*translated = inany_from_v4(c->ip4.addr);
>> +		*translated = inany_from_v4(c->ip4.addrs[0].addr);
>>   	else if (inany_equals6(addr, &c->ip6.map_guest_addr))
>> -		translated->a6 = c->ip6.addr;
>> +		translated->a6 = c->ip6.addrs[0].addr;
>>   	else
>>   		*translated = *addr;
>>   }
>> @@ -710,10 +710,10 @@ bool nat_inbound(const struct ctx *c, const union inany_addr *addr,
>>   		   inany_equals6(addr, &in6addr_loopback)) {
>>   		translated->a6 = c->ip6.map_host_loopback;
>>   	} else if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.map_guest_addr) &&
>> -		   inany_equals4(addr, &c->ip4.addr)) {
>> +		   inany_equals4(addr, &c->ip4.addrs[0].addr)) {
>>   		*translated = inany_from_v4(c->ip4.map_guest_addr);
>>   	} else if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.map_guest_addr) &&
>> -		   inany_equals6(addr, &c->ip6.addr)) {
>> +		   inany_equals6(addr, &c->ip6.addrs[0].addr)) {
>>   		translated->a6 = c->ip6.map_guest_addr;
>>   	} else if (fwd_guest_accessible(c, addr)) {
>>   		*translated = *addr;
>> diff --git a/ip.h b/ip.h
>> index 5830b92..748cb1f 100644
>> --- a/ip.h
>> +++ b/ip.h
>> @@ -135,4 +135,30 @@ static const struct in_addr in4addr_broadcast = { 0xffffffff };
>>   #define IPV6_MIN_MTU		1280
>>   #endif
>>   
>> +/* Maximum number of addresses per address family */
>> +#define IP4_MAX_ADDRS		8
>> +#define IP6_MAX_ADDRS		16
>> +
>> +/**
>> + * struct ip4_addr_entry - IPv4 address with prefix length
>> + * @addr:		IPv4 address
>> + * @prefix_len:		Prefix length (netmask bits)
>> + */
>> +struct ip4_addr_entry {
>> +	struct in_addr addr;
>> +	int prefix_len;
>> +	int permanent;
> 
> Might as well make these uint8_t and bool, respectively.  There will
> be some padding, but the overall structure will still be smaller.
> 
> Or, it might be worth considering replacing 'permanent' with a flags
> mask, in case we have future uses for it.

Agree with that. I'll make that change, and if we ver need more we can
replace the boolean with a bitmask.
/jon


> 


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC 04/12] conf: Apply -n/--netmask to most recently added address
  2025-12-15  9:54   ` David Gibson
@ 2025-12-15 22:43     ` Jon Maloy
  0 siblings, 0 replies; 25+ messages in thread
From: Jon Maloy @ 2025-12-15 22:43 UTC (permalink / raw)
  To: passt-dev



On 2025-12-15 04:54, David Gibson wrote:
> On Sun, Dec 14, 2025 at 08:54:33PM -0500, Jon Maloy wrote:
>> We allow for multiple -n/--netmask options, and update the handling so
>> that each given prefix length is applied to the most recently added
>> address instead of always to addrs[0].
>>
>> This allows per-address prefix configuration, such as:
>>
>>      -a 10.0.0.1 -n 24 -a 10.0.0.2 -n 16
>>
>> If no address has been added yet, -n still applies to addrs[0] for
>> backwards compatibility.
> 
> Huh.  I'd forgotten until looking at this series that we had the -n
> option.  We probably do need this change, but in general I think we'd
> be better off deprecating -n, in favour of allowing a prefix len as
> part of the -a option, e.g.
> 
> 	-a 10.0.0.1/24 -a 10.0.0.2/16
> 
> That could be a smaller change preliminary to this series.

Excellent idea. I really wasn't happy this semantics, either.
I'll send a separate patch.
/j
> 
>>
>> Signed-off-by: Jon Maloy <jmaloy@redhat.com>
>> ---
>>   conf.c | 9 +++++++--
>>   1 file changed, 7 insertions(+), 2 deletions(-)
>>
>> diff --git a/conf.c b/conf.c
>> index e9f217b..0a4a28a 100644
>> --- a/conf.c
>> +++ b/conf.c
>> @@ -1528,6 +1528,7 @@ void conf(struct ctx *c, int argc, char **argv)
>>   	size_t logsize = 0;
>>   	char *runas = NULL;
>>   	long fd_tap_opt;
>> +	int prefix, idx;
>>   	int name, ret;
>>   	uid_t uid;
>>   	gid_t gid;
>> @@ -1872,10 +1873,14 @@ void conf(struct ctx *c, int argc, char **argv)
>>   			die("Invalid address: %s", optarg);
>>   			break;
>>   		case 'n':
>> -			c->ip4.addrs[0].prefix_len = conf_ip4_prefix(optarg);
>> -			if (c->ip4.addrs[0].prefix_len < 0)
>> +			prefix = conf_ip4_prefix(optarg);
>> +
>> +			if (prefix < 0)
>>   				die("Invalid netmask: %s", optarg);
>>   
>> +			/* Apply to most recent address, or addrs[0] if none yet */
>> +			idx = c->ip4.addr_count ? c->ip4.addr_count - 1 : 0;
>> +			c->ip4.addrs[idx].prefix_len = prefix;
>>   			break;
>>   		case 'M':
>>   			parse_mac(c->our_tap_mac, optarg);
>> -- 
>> 2.51.1
>>
> 


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC 07/12] netlink: Subscribe to link/address changes in namespace
  2025-12-15 10:32   ` David Gibson
@ 2025-12-15 23:25     ` Jon Maloy
  0 siblings, 0 replies; 25+ messages in thread
From: Jon Maloy @ 2025-12-15 23:25 UTC (permalink / raw)
  To: David Gibson; +Cc: sbrivio, dgibson, passt-dev



On 2025-12-15 05:32, David Gibson wrote:
> On Sun, Dec 14, 2025 at 08:54:36PM -0500, Jon Maloy wrote:
>> We add subscriptions to RTMGRP_LINK, RTMGRP_IPV4_IFADDR, and
>> RTMGRP_IPV6_IFADDR, so that we can receive notifications when link
>> state or addresses change on the namespace interface.
>>
>> When addresses are discovered via netlink:
>>
>> - We mark them as non-permanent, which means they can be modified or
>>    deleted by subsequent events.
>> - We apply the prefix indicated in the notification.
>> - Update addr_seen to track the new address as the active one.
> 
> addr_seen isn't really about an "active" address.  The expectation was
> that the guest would only use a single address, it just might not be
> the one we told it to.
> 
> Now that we're aiming to allow multiple concurrent addresses, we can
> expect the guest to use all of them actively.

Right. This makes the case for having guest/tap side subscriptions, 
since we now will now know all addresses he is using, so addr_seen
should become obsolete.


> 
>> This provides the foundation for dynamic address monitoring,
>> and supports runtime network changes.
>>
>> Signed-off-by: Jon Maloy <jmaloy@redhat.com>
>> ---
>>   epoll_type.h |   2 +
>>   netlink.c    | 370 +++++++++++++++++++++++++++++++++++++++++++++++++++
>>   netlink.h    |   3 +
>>   passt.c      |   5 +
>>   passt.h      |   1 +
>>   tap.c        |   6 +-
>>   6 files changed, 384 insertions(+), 3 deletions(-)
>>
>> diff --git a/epoll_type.h b/epoll_type.h
>> index a90ffb6..0a16d94 100644
>> --- a/epoll_type.h
>> +++ b/epoll_type.h
>> @@ -46,6 +46,8 @@ enum epoll_type {
>>   	EPOLL_TYPE_REPAIR,
>>   	/* Netlink neighbour subscription socket */
>>   	EPOLL_TYPE_NL_NEIGH,
>> +	/* Netlink link/address subscription socket */
>> +	EPOLL_TYPE_NL_LINKADDR,
>>   
>>   	EPOLL_NUM_TYPES,
>>   };
>> diff --git a/netlink.c b/netlink.c
>> index 82a2f0c..7492f17 100644
>> --- a/netlink.c
>> +++ b/netlink.c
>> @@ -35,6 +35,9 @@
>>   #include "passt.h"
>>   #include "log.h"
>>   #include "ip.h"
>> +#include "tap.h"
>> +#include "arp.h"
>> +#include "ndp.h"
>>   #include "netlink.h"
>>   #include "epoll_ctl.h"
>>   
>> @@ -59,6 +62,7 @@
>>   int nl_sock		 = -1;
>>   int nl_sock_ns		 = -1;
>>   static int nl_sock_neigh = -1;
>> +static int nl_sock_linkaddr = -1;
>>   static int nl_seq	 = 1;
>>   
>>   /**
>> @@ -91,6 +95,372 @@ static int nl_sock_init_do(void *arg)
>>   	return 0;
>>   }
>>   
>> +/**
>> + * nl_addr4_find() - Find an IPv4 address in the address array
>> + * @c:		Execution context
>> + * @addr:	Address to find
>> + *
>> + * Return: index if found, -1 otherwise
>> + */
>> +static int nl_addr4_find(const struct ctx *c, const struct in_addr *addr)
>> +{
>> +	int i;
>> +
>> +	for (i = 0; i < c->ip4.addr_count; i++)
>> +		if (IN4_ARE_ADDR_EQUAL(&c->ip4.addrs[i].addr, addr))
>> +			return (int)i;
>> +
>> +	return -1;
>> +}
>> +
>> +/**
>> + * nl_addr6_find() - Find an IPv6 address in the address array
>> + * @c:		Execution context
>> + * @addr:	Address to find
>> + *
>> + * Return: index if found, -1 otherwise
>> + */
>> +static int nl_addr6_find(const struct ctx *c, const struct in6_addr *addr)
>> +{
>> +	int i;
>> +
>> +	for (i = 0; i < c->ip6.addr_count; i++)
>> +		if (IN6_ARE_ADDR_EQUAL(&c->ip6.addrs[i].addr, addr))
>> +			return (int)i;
>> +
>> +	return -1;
>> +}
>> +
>> +/**
>> + * nl_addr4_add() - Add a discovered IPv4 address to the address array
>> + * @c:		Execution context
>> + * @addr:	Address to add
>> + * @prefix_len: Prefix length
>> + *
>> + * Return: true if added or updated, false if array full or already permanent
>> + */
>> +static bool nl_addr4_add(struct ctx *c, const struct in_addr *addr,
>> +			 int prefix_len)
>> +{
>> +	int idx = nl_addr4_find(c, addr);
>> +
>> +	if (idx >= 0) {
>> +		/* Address exists - if permanent, don't touch; else update */
>> +		if (c->ip4.addrs[idx].permanent)
>> +			return false;
>> +		c->ip4.addrs[idx].prefix_len = prefix_len;
>> +		return true;
>> +	}
>> +
>> +	/* New address - add if room */
>> +	if (c->ip4.addr_count >= IP4_MAX_ADDRS) {
>> +		debug("IPv4 address array full, ignoring discovered address");
>> +		return false;
>> +	}
>> +
>> +	idx = c->ip4.addr_count++;
>> +	c->ip4.addrs[idx].addr = *addr;
>> +	c->ip4.addrs[idx].prefix_len = prefix_len;
>> +	c->ip4.addrs[idx].permanent = 0;
>> +	return true;
>> +}
>> +
>> +/**
>> + * nl_addr6_add() - Add a discovered IPv6 address to the address array
>> + * @c:		Execution context
>> + * @addr:	Address to add
>> + * @prefix_len: Prefix length
>> + *
>> + * Return: true if added or updated, false if array full or already permanent
>> + */
>> +static bool nl_addr6_add(struct ctx *c, const struct in6_addr *addr,
>> +			 int prefix_len)
>> +{
>> +	int idx = nl_addr6_find(c, addr);
>> +
>> +	if (idx >= 0) {
>> +		/* Address exists - if permanent, don't touch; else update */
>> +		if (c->ip6.addrs[idx].permanent)
>> +			return false;
>> +		c->ip6.addrs[idx].prefix_len = prefix_len;
>> +		return true;
>> +	}
>> +
>> +	/* New address - add if room */
>> +	if (c->ip6.addr_count >= IP6_MAX_ADDRS) {
>> +		debug("IPv6 address array full, ignoring discovered address");
>> +		return false;
>> +	}
>> +
>> +	idx = c->ip6.addr_count++;
>> +	c->ip6.addrs[idx].addr = *addr;
>> +	c->ip6.addrs[idx].prefix_len = prefix_len;
>> +	c->ip6.addrs[idx].permanent = 0;
>> +	return true;
>> +}
>> +
>> +/**
>> + * nl_addr4_del() - Remove an IPv4 address from the array if not permanent
>> + * @c:		Execution context
>> + * @addr:	Address to remove
>> + *
>> + * Return: true if removed, false if not found or permanent
>> + */
>> +static bool nl_addr4_del(struct ctx *c, const struct in_addr *addr)
>> +{
>> +	int i, idx = nl_addr4_find(c, addr);
>> +
>> +	if (idx < 0)
>> +		return false;
>> +
>> +	if (c->ip4.addrs[idx].permanent)
>> +		return false;
>> +
>> +	/* Shift remaining entries down */
>> +	c->ip4.addr_count--;
>> +	for (i = idx; i < c->ip4.addr_count; i++)
>> +		c->ip4.addrs[i] = c->ip4.addrs[i + 1];
>> +
>> +	return true;
>> +}
>> +
>> +/**
>> + * nl_addr6_del() - Remove an IPv6 address from the array if not permanent
>> + * @c:		Execution context
>> + * @addr:	Address to remove
>> + *
>> + * Return: true if removed, false if not found or permanent
>> + */
>> +static bool nl_addr6_del(struct ctx *c, const struct in6_addr *addr)
>> +{
>> +	int i, idx = nl_addr6_find(c, addr);
>> +
>> +	if (idx < 0)
>> +		return false;
>> +
>> +	if (c->ip6.addrs[idx].permanent)
>> +		return false;
>> +
>> +	/* Shift remaining entries down */
>> +	c->ip6.addr_count--;
>> +	for (i = idx; i < c->ip6.addr_count; i++)
>> +		c->ip6.addrs[i] = c->ip6.addrs[i + 1];
>> +
>> +	return true;
>> +}
> 
> All the functions above are more to do with the data structure storing
> the addresses than they are to do with netlink.  Better to move them
> into... maybe ip.c?  And use them from conf.c as well.

Agreed. I'll fix that.

> 
> Given the amount of near-duplication here, maybe it would be better to
> have a single table for v4 and v6 using inany_addr?
Yes.
> 
>> +/**
>> + * nl_linkaddr_msg_read() - Parse and log a netlink link/addr message
>> + * @c:		Execution context
>> + * @nh:	Netlink message header
>> + */
>> +static void nl_linkaddr_msg_read(struct ctx *c, const struct nlmsghdr *nh)
>> +{
>> +	if (nh->nlmsg_type == NLMSG_DONE || nh->nlmsg_type == NLMSG_ERROR)
>> +		return;
>> +
>> +	if (nh->nlmsg_type == RTM_NEWLINK || nh->nlmsg_type == RTM_DELLINK) {
>> +		const struct ifinfomsg *ifm = NLMSG_DATA(nh);
>> +		struct rtattr *rta = IFLA_RTA(ifm);
>> +		size_t na = IFLA_PAYLOAD(nh);
>> +		const char *name = "?";
>> +		bool up = !!(ifm->ifi_flags & IFF_UP);
>> +		bool running = !!(ifm->ifi_flags & IFF_RUNNING);
>> +
>> +		for (; RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
>> +			if (rta->rta_type == IFLA_IFNAME) {
>> +				name = (const char *)RTA_DATA(rta);
>> +				break;
>> +			}
>> +		}
>> +
>> +		/* Update pasta interface UP state if this is our interface */
>> +		if (c->mode == MODE_PASTA &&
>> +		    (unsigned int)ifm->ifi_index == c->pasta_ifi) {
>> +			c->pasta_ifi_up = up;
>> +			debug("Interface %s", up ? "UP" : "DOWN");
> 
> This only makes sense if we're listening to netlink messages in the
> guest netns, but the address stuff only makes sense listening to
> messages in the host netns.

See previous response.>
>> +		}
>> +
>> +		if (nh->nlmsg_type == RTM_NEWLINK)
>> +			debug("Link %s (idx=%d): %s %s", name, ifm->ifi_index,
>> +			     up ? "UP" : "DOWN", running ? "RUNNING" : "");
>> +		else
>> +			debug("Link %s (idx=%d): DELETED", name, ifm->ifi_index);
>> +
>> +		return;
>> +	}
>> +
>> +	if (nh->nlmsg_type == RTM_NEWADDR || nh->nlmsg_type == RTM_DELADDR) {
>> +		bool is_new = (nh->nlmsg_type == RTM_NEWADDR);
>> +		const struct ifaddrmsg *ifa = NLMSG_DATA(nh);
>> +		char addr_str[INET6_ADDRSTRLEN];
>> +		struct rtattr *rta = IFA_RTA(ifa);
>> +		char ifname[IFNAMSIZ] = { 0 };
>> +		size_t na = IFA_PAYLOAD(nh);
>> +		void *addr = NULL;
>> +		for (; RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
>> +			if (ifa->ifa_family == AF_INET &&
>> +			    rta->rta_type == IFA_LOCAL) {
>> +				addr = RTA_DATA(rta);
>> +				break;
>> +			} else if (ifa->ifa_family == AF_INET6 &&
>> +				   rta->rta_type == IFA_ADDRESS) {
>> +				addr = RTA_DATA(rta);
>> +				break;
>> +			}
>> +		}
>> +
>> +		if (!addr)
>> +			return;
>> +
>> +		if_indextoname(ifa->ifa_index, ifname);
>> +		inet_ntop(ifa->ifa_family, addr, addr_str, sizeof(addr_str));
>> +
>> +		debug("%s addr on %s (index=%d): %s/%i%s",
>> +		      is_new ? "NEW" : "DEL", ifname, ifa->ifa_index, addr_str,
>> +		      ifa->ifa_prefixlen,
>> +		      tap_is_ready(c) ? " (tap UP)" : " (tap DOWN)");
>> +
>> +		/* Only handle our pasta interface */
>> +		if (c->mode != MODE_PASTA || ifa->ifa_index != c->pasta_ifi)
>> +			return;
> 
> Nope.  This is a host netns event, so comparing to pasta_ifi makes no
> sense.  We _should_ be comparing to ifi4 or ifi6 (depending on address
> family), and we should probably do that before we go parsing the
> details above.
> 
> We should also probably check for --no-copy-addrs here, too.
> 
> In the other direction, even for PASST mode we can store this address
> in our table, it just won't do anything until DHCP or whatever
> consults it.

ok

> 
>> +
>> +		if (ifa->ifa_family == AF_INET) {
>> +			struct in_addr *a = (struct in_addr *)addr;
>> +
>> +			if (!is_new) {
>> +				nl_addr4_del(c, a);
>> +				return;
>> +			}
>> +
>> +			if (nl_addr4_add(c, a, ifa->ifa_prefixlen)) {
>> +				c->ip4.addr_seen = *a;
>> +				if (c->pasta_ifi_up && c->ifi4) {
>> +					debug("Sending ARP");
>> +					arp_send_init_req(c);
> 
> What does this ARP request do?  AFAICT we haven't actually added the
> address in the guest netns yet, so the guest won't respond to the ARP.

The address was set by the guest, so why wouldn't he respond?

> 
>> +				}
>> +			}
>> +		} else if (ifa->ifa_family == AF_INET6) {
>> +			struct in6_addr *a = (struct in6_addr *)addr;
>> +
>> +			if (!is_new) {
>> +				nl_addr6_del(c, a);
>> +				return;
>> +			}
>> +
>> +			if (nl_addr6_add(c, a,
>> +					 ifa->ifa_prefixlen)) {
>> +				c->ip6.addr_seen = *a;
>> +				if (c->pasta_ifi_up &&
>> +				    c->ifi6 && !c->no_ndp) {
>> +					debug("Sending NDP");
>> +					ndp_send_init_req(c);
> 
> Some question with this NDP.
> 
>> +				}
>> +			}
>> +		}
>> +	}
>> +}
>> +
>> +/**
>> + * nl_linkaddr_notify_handler() - Handle events from link/addr notifier socket
>> + * @c:		Execution context
>> + */
>> +void nl_linkaddr_notify_handler(struct ctx *c)
>> +{
>> +	char buf[NLBUFSIZ];
>> +
>> +	for (;;) {
>> +		ssize_t n = recv(nl_sock_linkaddr, buf, sizeof(buf), MSG_DONTWAIT);
>> +		struct nlmsghdr *nh = (struct nlmsghdr *)buf;
>> +
>> +		if (n < 0) {
>> +			if (errno == EINTR)
>> +				continue;
>> +			if (errno != EAGAIN)
>> +				debug("recv() error: %s", strerror_(errno));
>> +			break;
>> +		}
>> +
>> +		debug("Received %zd bytes", n);
>> +
>> +		for (; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n))
>> +			nl_linkaddr_msg_read(c, nh);
>> +	}
>> +}
>> +
>> +/**
>> + * nl_linkaddr_init_do() - Actually create and bind the netlink socket
>> + * @arg:	Execution context (for namespace entry) or NULL
>> + *
>> + * Return: 0 on success, -1 on failure
>> + */
>> +static int nl_linkaddr_init_do(void *arg)
>> +{
>> +	struct sockaddr_nl addr = { .nl_family = AF_NETLINK,
>> +		.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR |
>> +			     RTMGRP_IPV6_IFADDR };
>> +
>> +	if (arg)
>> +		ns_enter((struct ctx *)arg);
>> +
>> +	nl_sock_linkaddr = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
> 
> Is there a reason to use an additional socket, rather than adding more
> events to the neighbour listening socket?

None in particular. I'll look into it.

> 
>> +	if (nl_sock_linkaddr < 0) {
>> +		debug("socket() failed: %s", strerror_(errno));
>> +		return -1;
>> +	}
>> +
>> +	if (bind(nl_sock_linkaddr, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
>> +		debug("bind() failed: %s", strerror_(errno));
>> +		close(nl_sock_linkaddr);
>> +		nl_sock_linkaddr = -1;
>> +		return -1;
>> +	}
>> +
>> +	debug("socket fd=%d", nl_sock_linkaddr);
>> +	return 0;
>> +}
>> +
>> +/**
>> + * nl_linkaddr_notify_init() - Initialize link/address change notifier
>> + * @c:		Execution context
>> + *
>> + * Return: 0 on success, -1 on failure
>> + */
>> +int nl_linkaddr_notify_init(const struct ctx *c)
>> +{
>> +	union epoll_ref ref = { .type = EPOLL_TYPE_NL_LINKADDR };
>> +	struct epoll_event ev = { .events = EPOLLIN };
>> +
>> +	if (nl_sock_linkaddr >= 0) {
>> +		debug("notifier already initialized (fd=%d)", nl_sock_linkaddr);
>> +		return 0;
>> +	}
>> +
>> +	/* Open the notifier socket in the namespace for pasta mode,
>> +	 * or in the init namespace otherwise.
> 
> Definitely wrong.  We're trying to watch host addresses so that we can
> copy them to the guest - therefore we always need to watch in the host
> netns.  On pasta there might be reasons to *also* listen in the guest
> netns, but that would want a different handler to do different things.

Hmm. I think I'll wait for your feedback on the remaining patches before 
I comment on this.
/jon


^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2025-12-15 23:26 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-12-15  1:54 [RFC 00/12] Support for multiple address and late binding Jon Maloy
2025-12-15  1:54 ` [RFC 01/12] ip: Introduce multi-address data structures for IPv4 and IPv6 Jon Maloy
2025-12-15  9:40   ` David Gibson
2025-12-15 22:05     ` Jon Maloy
2025-12-15  9:46   ` David Gibson
2025-12-15  1:54 ` [RFC 02/12] ip: Add ip4_default_prefix_len() helper function for class-based prefix Jon Maloy
2025-12-15  9:41   ` David Gibson
2025-12-15  1:54 ` [RFC 03/12] conf: Allow multiple -a/--address options per address family Jon Maloy
2025-12-15  9:53   ` David Gibson
2025-12-15  1:54 ` [RFC 04/12] conf: Apply -n/--netmask to most recently added address Jon Maloy
2025-12-15  9:54   ` David Gibson
2025-12-15 22:43     ` Jon Maloy
2025-12-15  1:54 ` [RFC 05/12] fwd: Check all configured addresses in guest accessibility functions Jon Maloy
2025-12-15 10:06   ` David Gibson
2025-12-15  1:54 ` [RFC 06/12] arp: Check all configured addresses in ARP filtering Jon Maloy
2025-12-15 10:07   ` David Gibson
2025-12-15  1:54 ` [RFC 07/12] netlink: Subscribe to link/address changes in namespace Jon Maloy
2025-12-15 10:32   ` David Gibson
2025-12-15 23:25     ` Jon Maloy
2025-12-15  1:54 ` [RFC 08/12] netlink: Subscribe to route " Jon Maloy
2025-12-15 10:38   ` David Gibson
2025-12-15  1:54 ` [RFC 09/12] netlink: Add host-side monitoring for late template interface binding Jon Maloy
2025-12-15  1:54 ` [RFC 10/12] netlink: Add host-side route monitoring and propagation Jon Maloy
2025-12-15  1:54 ` [RFC 11/12] netlink: Prevent host route events from overwriting guest-configured gateway Jon Maloy
2025-12-15  1:54 ` [RFC 12/12] netlink: Rename tap interface when late binding discovers template name Jon Maloy

Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).