public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: Jon Maloy <jmaloy@redhat.com>
To: sbrivio@redhat.com, dgibson@redhat.com,
	david@gibson.dropbear.id.au, jmaloy@redhat.com,
	passt-dev@passt.top
Subject: [PATCH v8 1/8] netlink: add subsciption on changes in NDP/ARP table
Date: Sun, 21 Sep 2025 18:08:58 -0400	[thread overview]
Message-ID: <20250921220905.478621-2-jmaloy@redhat.com> (raw)
In-Reply-To: <20250921220905.478621-1-jmaloy@redhat.com>

The solution to bug https://bugs.passt.top/show_bug.cgi?id=120
requires the ability to translate from an IP address to its
corresponding MAC address in cases where those are present in
the ARP or NDP tables.

To keep track of the contents of these tables we add a netlink
based neighbour subscription feature.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

---
v3: - Added an attribute contianing NDA_DST to sent message, so
      that we let the kernel do the filtering of the IP address
      and return only one entry.
    - Added interface index to the call signature. Since the only
      interface we know is the template interface, this limits
      the number of hosts that will be seen as 'network segment
      local' from a PASST viewpoint.
v4: - Made loop independent of attribute order.
    - Ignoring L2 addresses which are not of size ETH_ALEN.
v5: - Changed return value of new function, so caller can know if
      a MAC address really was found.
v6: - Removed warning printout which had ended up in the wrong
      commit.
v8: - Changed to neighbour event subscription model

netlink: arp/ndp table subscription
---
 epoll_type.h |   2 +
 netlink.c    | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++
 netlink.h    |   4 ++
 passt.c      |   8 ++++
 4 files changed, 128 insertions(+)

diff --git a/epoll_type.h b/epoll_type.h
index 12ac59b..a90ffb6 100644
--- a/epoll_type.h
+++ b/epoll_type.h
@@ -44,6 +44,8 @@ enum epoll_type {
 	EPOLL_TYPE_REPAIR_LISTEN,
 	/* TCP_REPAIR helper socket */
 	EPOLL_TYPE_REPAIR,
+	/* Netlink neighbour subscription socket */
+	EPOLL_TYPE_NL_NEIGH,
 
 	EPOLL_NUM_TYPES,
 };
diff --git a/netlink.c b/netlink.c
index c436780..1faf3da 100644
--- a/netlink.c
+++ b/netlink.c
@@ -53,6 +53,7 @@
 int nl_sock	= -1;
 int nl_sock_ns	= -1;
 static int nl_seq = 1;
+static int nl_sock_neigh = -1;
 
 /**
  * nl_sock_init_do() - Set up netlink sockets in init or target namespace
@@ -84,6 +85,119 @@ static int nl_sock_init_do(void *arg)
 	return 0;
 }
 
+/**
+ * nl_neigh_subscr_init() - Open a NETLINK_ROUTE socket and subscribe to neighbor events
+ *
+ * Return: 0 on success, -1 on failure
+ */
+int nl_neigh_subscr_init(struct ctx *c)
+{
+	struct epoll_event ev = { 0 };
+	union epoll_ref ref = { .type = EPOLL_TYPE_NL_NEIGH, .fd = 0 };
+
+	struct sockaddr_nl addr = {
+		.nl_family = AF_NETLINK,
+		.nl_groups = RTMGRP_NEIGH,
+	};
+
+	if (nl_sock_neigh >= 0)
+		return 0;
+
+	nl_sock_neigh = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
+	if (nl_sock_neigh < 0)
+		return -1;
+
+	if (bind(nl_sock_neigh, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
+		close(nl_sock_neigh);
+		nl_sock_neigh = -1;
+		return -1;
+	}
+
+	ref.fd = nl_sock_neigh;
+	ev.events = EPOLLIN;
+	ev.data.u64 = ref.u64;
+	if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, nl_sock_neigh, &ev) == -1) {
+		close(nl_sock_neigh);
+		nl_sock_neigh = -1;
+		return -1;
+	}
+
+	return 0;
+}
+
+/**
+ * nl_neigh_subscr_handler() - Non-blocking drain of pending neighbor updates
+ * @c:	       Execution context
+ */
+void nl_neigh_subscr_handler(struct ctx *c)
+{
+	struct nlmsghdr *nh;
+	char buf[NLBUFSIZ];
+	ssize_t n;
+
+	if (nl_sock_neigh < 0)
+		return;
+
+	for (;;) {
+		n = recv(nl_sock_neigh, buf, sizeof(buf), MSG_DONTWAIT);
+		if (n <= 0)
+			return;
+
+		nh = (struct nlmsghdr *)buf;
+		for (; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) {
+			struct ndmsg *ndm = NLMSG_DATA(nh);
+			struct rtattr *rta = (struct rtattr *)(ndm + 1);
+			size_t na = NLMSG_PAYLOAD(nh, sizeof(*ndm));
+			const uint8_t *lladdr = NULL;
+			const void *dst = NULL;
+			size_t lladdr_len = 0;
+			size_t dstlen = 0;
+
+			if (nh->nlmsg_type == NLMSG_DONE ||
+			    nh->nlmsg_type == NLMSG_ERROR)
+				continue;
+
+			if (nh->nlmsg_type != RTM_NEWNEIGH &&
+			    nh->nlmsg_type != RTM_DELNEIGH)
+				continue;
+
+			for (; RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
+				switch (rta->rta_type) {
+				case NDA_DST:
+					dst = RTA_DATA(rta);
+					dstlen = RTA_PAYLOAD(rta);
+					break;
+				case NDA_LLADDR:
+					lladdr = RTA_DATA(rta);
+					lladdr_len = RTA_PAYLOAD(rta);
+					break;
+				default:
+					break;
+				}
+			}
+
+			if (!dst)
+				continue;
+
+			if (dstlen != sizeof(struct in_addr) &&
+			    dstlen != sizeof(struct in6_addr))
+				continue;
+
+			char abuf[INET6_ADDRSTRLEN];
+
+			if (dstlen == sizeof(struct in_addr))
+				inet_ntop(AF_INET, dst, abuf, sizeof(abuf));
+			else
+				inet_ntop(AF_INET6, dst, abuf, sizeof(abuf));
+
+			if (nh->nlmsg_type == RTM_NEWNEIGH)
+				debug("neigh: NEW %s lladdr_len=%zu", abuf, lladdr_len);
+			else
+				debug("neigh: DEL %s", abuf);
+		}
+	}
+}
+
 /**
  * nl_sock_init() - Call nl_sock_init_do(), won't return on failure
  * @c:		Execution context
diff --git a/netlink.h b/netlink.h
index b51e99c..a7d3506 100644
--- a/netlink.h
+++ b/netlink.h
@@ -17,6 +17,8 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
 		 int s_dst, unsigned int ifi_dst, sa_family_t af);
 int nl_addr_get(int s, unsigned int ifi, sa_family_t af,
 		void *addr, int *prefix_len, void *addr_l);
+bool nl_neigh_mac_get(int s, const union inany_addr *addr, int ifi,
+		      unsigned char *mac);
 int nl_addr_set(int s, unsigned int ifi, sa_family_t af,
 		const void *addr, int prefix_len);
 int nl_addr_get_ll(int s, unsigned int ifi, struct in6_addr *addr);
@@ -28,5 +30,7 @@ int nl_link_set_mac(int s, unsigned int ifi, const void *mac);
 int nl_link_set_mtu(int s, unsigned int ifi, int mtu);
 int nl_link_set_flags(int s, unsigned int ifi,
 		      unsigned int set, unsigned int change);
+int nl_neigh_subscr_init(struct ctx *c);
+void nl_neigh_subscr_handler(struct ctx *c);
 
 #endif /* NETLINK_H */
diff --git a/passt.c b/passt.c
index 31fbb75..e20bbad 100644
--- a/passt.c
+++ b/passt.c
@@ -53,6 +53,7 @@
 #include "vu_common.h"
 #include "migrate.h"
 #include "repair.h"
+#include "netlink.h"
 
 #define EPOLL_EVENTS		8
 
@@ -79,6 +80,7 @@ char *epoll_type_str[] = {
 	[EPOLL_TYPE_VHOST_KICK]		= "vhost-user kick socket",
 	[EPOLL_TYPE_REPAIR_LISTEN]	= "TCP_REPAIR helper listening socket",
 	[EPOLL_TYPE_REPAIR]		= "TCP_REPAIR helper socket",
+	[EPOLL_TYPE_NL_NEIGH]		= "netlink neighbour subscription socket",
 };
 static_assert(ARRAY_SIZE(epoll_type_str) == EPOLL_NUM_TYPES,
 	      "epoll_type_str[] doesn't match enum epoll_type");
@@ -322,6 +324,9 @@ int main(int argc, char **argv)
 
 	pcap_init(&c);
 
+	if (nl_neigh_subscr_init(&c) < 0)
+		warn("Failed to subscribe to RTMGRP_NEIGH");
+
 	if (!c.foreground) {
 		if ((devnull_fd = open("/dev/null", O_RDWR | O_CLOEXEC)) < 0)
 			die_perror("Failed to open /dev/null");
@@ -414,6 +419,9 @@ loop:
 		case EPOLL_TYPE_REPAIR:
 			repair_handler(&c, eventmask);
 			break;
+		case EPOLL_TYPE_NL_NEIGH:
+			nl_neigh_subscr_handler(&c);
+			break;
 		default:
 			/* Can't happen */
 			ASSERT(0);
-- 
2.50.1


  reply	other threads:[~2025-09-21 22:09 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-21 22:08 [PATCH v8 0/8] Use true MAC address of LAN local remote hosts Jon Maloy
2025-09-21 22:08 ` Jon Maloy [this message]
2025-09-21 22:08 ` [PATCH v8 2/8] fwd: Add cache table for ARP/NDP contents Jon Maloy
2025-09-21 22:09 ` [PATCH v8 3/8] arp/ndp: respond with true MAC address of LAN local remote hosts Jon Maloy
2025-09-21 22:09 ` [PATCH v8 4/8] flow: add MAC address of LAN local remote hosts to flow Jon Maloy
2025-09-21 22:09 ` [PATCH v8 5/8] udp: forward external source MAC address through tap interface Jon Maloy
2025-09-21 22:09 ` [PATCH v8 6/8] tcp: " Jon Maloy
2025-09-21 22:09 ` [PATCH v8 7/8] tap: change signature of function tap_push_l2h() Jon Maloy
2025-09-21 22:09 ` [PATCH v8 8/8] icmp: let icmp use mac address from flowside structure Jon Maloy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250921220905.478621-2-jmaloy@redhat.com \
    --to=jmaloy@redhat.com \
    --cc=david@gibson.dropbear.id.au \
    --cc=dgibson@redhat.com \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).