public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: Jon Maloy <jmaloy@redhat.com>
To: sbrivio@redhat.com, dgibson@redhat.com,
	david@gibson.dropbear.id.au, jmaloy@redhat.com,
	passt-dev@passt.top
Subject: [PATCH v6 2/8] fwd: Added cache table for ARP/NDP contents
Date: Tue,  9 Sep 2025 21:59:13 -0400	[thread overview]
Message-ID: <20250910015919.173414-3-jmaloy@redhat.com> (raw)
In-Reply-To: <20250910015919.173414-1-jmaloy@redhat.com>

We add a cache table to keep partial contents of the kernel ARP/NDP
tables. This way, we drastically reduce the number of netlink calls
to read those tables.

We create undefined cache entries representing non- or not-yet-
existing ARP/NDP entries when needed. We add a short expiration time
to each such entry, so that we can know when to make repeated calls to
the kernel tables in the beginning. We also add an access counter to the
entries, to ensure that the timer becomes longer and the call frequency
abates over time if no ARP/NDP entry shows up.

For regular entries we use a much longer timer, with the purpose to
update the entry in the rare case that a remote host changes its
MAC address.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>

---
v5: - Moved to earlier in series to reduce rebase conflicts
v6: - Sqashed the hash list commit and the FIFO/LRU queue commit
    - Removed hash lookup. We now only use linear lookup in a
      linked list
    - Eliminated dynamic memory allocation.
    - Ensured there is only one call to clock_gettime()
    - Using MAC_ZERO instead of the previously dedicated definitions
---
 conf.c |   1 +
 fwd.c  | 176 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fwd.h  |   4 ++
 util.c |  12 ++++
 util.h |   1 +
 5 files changed, 194 insertions(+)

diff --git a/conf.c b/conf.c
index f47f48e..27b04d3 100644
--- a/conf.c
+++ b/conf.c
@@ -2122,6 +2122,7 @@ void conf(struct ctx *c, int argc, char **argv)
 		c->udp.fwd_out.mode = fwd_default;
 
 	fwd_scan_ports_init(c);
+	fwd_mac_cache_init();
 
 	if (!c->quiet)
 		conf_print(c);
diff --git a/fwd.c b/fwd.c
index 250cf56..c58ba9f 100644
--- a/fwd.c
+++ b/fwd.c
@@ -19,6 +19,8 @@
 #include <sched.h>
 #include <unistd.h>
 #include <stdio.h>
+#include <time.h>
+#include <netinet/if_ether.h>
 
 #include "util.h"
 #include "ip.h"
@@ -26,6 +28,8 @@
 #include "passt.h"
 #include "lineread.h"
 #include "flow_table.h"
+#include "inany.h"
+#include "netlink.h"
 
 /* Empheral port range: values from RFC 6335 */
 static in_port_t fwd_ephemeral_min = (1 << 15) + (1 << 14);
@@ -33,6 +37,178 @@ static in_port_t fwd_ephemeral_max = NUM_PORTS - 1;
 
 #define PORT_RANGE_SYSCTL	"/proc/sys/net/ipv4/ip_local_port_range"
 
+#define MAC_CACHE_SIZE 128
+#define MAC_CACHE_RENEWAL 3600  /* Refresh entry from ARP/NDP every hour */
+
+/* Partial cache of ARP/NDP table contents */
+struct mac_cache_entry {
+	struct mac_cache_entry *prev;
+	struct mac_cache_entry *next;
+	struct timespec expiry;
+	union inany_addr addr;
+	uint8_t mac[ETH_ALEN];
+	uint16_t access_count;
+};
+
+struct mac_cache_table {
+	struct mac_cache_entry mac_table[MAC_CACHE_SIZE];
+
+	/* FIFO/LRU queue */
+	struct mac_cache_entry *head;
+	struct mac_cache_entry *tail;
+};
+
+static struct mac_cache_table mac_cache;
+const uint8_t undefined_mac[6] = {0, 0, 0, 0, 0, 0};
+
+/**
+ * fwd_mac_cache_unlink() - Unlink entry from LRU queue
+ */
+static void fwd_mac_cache_unlink(struct mac_cache_entry *e)
+{
+	struct mac_cache_table *t = &mac_cache;
+
+	if (e->prev)
+		e->prev->next = e->next;
+	else
+		t->head = e->next;
+
+	if (e->next)
+		e->next->prev = e->prev;
+	else
+		t->tail = e->prev;
+
+	e->prev = e->next = NULL;
+}
+
+/**
+ * fwd_mac_cache_append_tail() - Add entry to tail of LRU queue
+ */
+static void fwd_mac_cache_append_to_tail(struct mac_cache_entry *e)
+{
+	struct mac_cache_table *t = &mac_cache;
+
+	e->next = NULL;
+	e->prev = t->tail;
+	t->tail->next = e;
+	t->tail = e;
+}
+
+/**
+ * fwd_mac_cache_move_to_tail() - Move entry to tail of LRU queue
+ */
+static void fwd_mac_cache_move_to_tail(struct mac_cache_entry *e)
+{
+	struct mac_cache_table *t = &mac_cache;
+
+	if (t->tail == e)
+		return;
+
+	fwd_mac_cache_unlink(e);
+	fwd_mac_cache_append_to_tail(e);
+}
+
+/**
+ * mac_entry_set_expiry() - Set the time for a cache entry to expire
+ * @e:		Cache entry
+ * @expiry:	Expiration time, in seconds from current moment.
+ */
+static void mac_entry_set_expiry(struct mac_cache_entry *e, struct timespec *now, int expiry)
+{
+	e->expiry = *now;
+	e->expiry.tv_sec += expiry;
+}
+
+/**
+ * fwd_mac_cache_find() - Find an entry in the ARP/NDP cache table
+ * @addr:	IPv4 or IPv6 address, used as key for the lookup
+ *
+ * Return: Pointer to the entry on success, NULL on failure.
+ */
+static struct mac_cache_entry *fwd_mac_cache_find(const union inany_addr *addr)
+{
+	const struct mac_cache_table *t = &mac_cache;
+	struct mac_cache_entry *e = t->tail;
+
+	for (e = t->tail; e; e = e->prev) {
+		if (inany_equals(&e->addr, addr))
+			return e;
+	}
+	return NULL;
+}
+
+/**
+ * fwd_neigh_mac_get() - Find a MAC address the ARP/NDP cache table
+ * @c:		Execution context
+ * @addr:	IPv4 or IPv6 address
+ * @ifi:	Interface index
+ * @mac:	Buffer for Ethernet MAC to return, found or default.
+ *
+ * Return: true if real MAC found, false if not found or failure
+ */
+bool fwd_neigh_mac_get(const struct ctx *c, const union inany_addr *addr,
+		       uint8_t *mac)
+{
+	struct mac_cache_entry *e = fwd_mac_cache_find(addr);
+	int ifi = inany_v4(addr) ? c->ifi4 : c->ifi6;
+	bool refresh = false;
+	bool found = false;
+	struct timespec now;
+
+	clock_gettime(CLOCK_MONOTONIC, &now);
+
+	if (e) {
+		refresh = timespec_before(&e->expiry, &now);
+	} else {
+		/* Recycle least recently used entry */
+		e = mac_cache.head;
+		e->addr = *addr;
+		memcpy(e->mac, undefined_mac, ETH_ALEN);
+		e->access_count = 0;
+		refresh = true;
+	}
+
+	if (!refresh) {
+		found = !MAC_IS_ZERO(e->mac);
+	} else {
+		found = nl_neigh_mac_get(nl_sock, addr, ifi, e->mac);
+		mac_entry_set_expiry(e, &now, MAC_CACHE_RENEWAL);
+	}
+
+	if (found) {
+		memcpy(mac, e->mac, ETH_ALEN);
+	} else {
+		/* Do linear back-off from new netlink calls if nothing found */
+		mac_entry_set_expiry(e, &now, e->access_count++);
+		memcpy(mac, c->our_tap_mac, ETH_ALEN);
+	}
+
+	/* Set to most recently used */
+	fwd_mac_cache_move_to_tail(e);
+
+	return found;
+}
+
+/**
+ * fwd_mac_cache_init() - Initiate ARP/NDP cache table
+ */
+void fwd_mac_cache_init(void)
+{
+	struct mac_cache_table *t = &mac_cache;
+	struct mac_cache_entry *e;
+	int i;
+
+	memset(t, 0, sizeof(*t));
+
+	for (i = 0; i < MAC_CACHE_SIZE; i++) {
+		e = &t->mac_table[i];
+		e->prev = (i == 0) ? NULL : &t->mac_table[i - 1];
+		e->next = (i < (MAC_CACHE_SIZE - 1)) ? &t->mac_table[i + 1] : NULL;
+	}
+	t->head = &t->mac_table[0];
+	t->tail = &t->mac_table[MAC_CACHE_SIZE - 1];
+}
+
 /** fwd_probe_ephemeral() - Determine what ports this host considers ephemeral
  *
  * Work out what ports the host thinks are emphemeral and record it for later
diff --git a/fwd.h b/fwd.h
index 65c7c96..728601f 100644
--- a/fwd.h
+++ b/fwd.h
@@ -57,4 +57,8 @@ uint8_t fwd_nat_from_splice(const struct ctx *c, uint8_t proto,
 uint8_t fwd_nat_from_host(const struct ctx *c, uint8_t proto,
 			  const struct flowside *ini, struct flowside *tgt);
 
+bool fwd_neigh_mac_get(const struct ctx *c, const union inany_addr *addr,
+		       uint8_t *mac);
+void fwd_mac_cache_init(void);
+
 #endif /* FWD_H */
diff --git a/util.c b/util.c
index c492f90..8a8846c 100644
--- a/util.c
+++ b/util.c
@@ -305,6 +305,18 @@ long timespec_diff_ms(const struct timespec *a, const struct timespec *b)
 	return timespec_diff_us(a, b) / 1000;
 }
 
+/**
+ * timespec_before() - Check the relation between two points in time
+ * @a:		Point in time to be tested
+ * @b:		Point in time test a against
+ * Return: True if a comes before b, otherwise b
+ */
+bool timespec_before(const struct timespec *a, const struct timespec *b)
+{
+	return (a->tv_sec < b->tv_sec) ||
+	       (a->tv_sec == b->tv_sec && a->tv_nsec < b->tv_nsec);
+}
+
 /**
  * bitmap_set() - Set single bit in bitmap
  * @map:	Pointer to bitmap
diff --git a/util.h b/util.h
index 2a8c38f..5ec3f22 100644
--- a/util.h
+++ b/util.h
@@ -207,6 +207,7 @@ int sock_unix(char *sock_path);
 void sock_probe_mem(struct ctx *c);
 long timespec_diff_ms(const struct timespec *a, const struct timespec *b);
 int64_t timespec_diff_us(const struct timespec *a, const struct timespec *b);
+bool timespec_before(const struct timespec *a, const struct timespec *b);
 void bitmap_set(uint8_t *map, unsigned bit);
 void bitmap_clear(uint8_t *map, unsigned bit);
 bool bitmap_isset(const uint8_t *map, unsigned bit);
-- 
@@ -207,6 +207,7 @@ int sock_unix(char *sock_path);
 void sock_probe_mem(struct ctx *c);
 long timespec_diff_ms(const struct timespec *a, const struct timespec *b);
 int64_t timespec_diff_us(const struct timespec *a, const struct timespec *b);
+bool timespec_before(const struct timespec *a, const struct timespec *b);
 void bitmap_set(uint8_t *map, unsigned bit);
 void bitmap_clear(uint8_t *map, unsigned bit);
 bool bitmap_isset(const uint8_t *map, unsigned bit);
-- 
2.50.1


  parent reply	other threads:[~2025-09-10  1:59 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-10  1:59 [PATCH v6 0/8] Use true MAC address of LAN local remote hosts Jon Maloy
2025-09-10  1:59 ` [PATCH v6 1/8] netlink: add function to extract MAC addresses from NDP/ARP table Jon Maloy
2025-09-10  1:59 ` Jon Maloy [this message]
2025-09-10  2:48   ` [PATCH v6 2/8] fwd: Added cache table for ARP/NDP contents David Gibson
2025-09-10  1:59 ` [PATCH v6 3/8] arp/ndp: respond with true MAC address of LAN local remote hosts Jon Maloy
2025-09-10  2:50   ` David Gibson
2025-09-10  1:59 ` [PATCH v6 4/8] flow: add MAC address of LAN local remote hosts to flow Jon Maloy
2025-09-10  1:59 ` [PATCH v6 5/8] udp: forward external source MAC address through tap interface Jon Maloy
2025-09-10  1:59 ` [PATCH v6 6/8] tcp: " Jon Maloy
2025-09-10  2:53   ` David Gibson
2025-09-10  1:59 ` [PATCH v6 7/8] tap: change signature of function tap_push_l2h() Jon Maloy
2025-09-10  2:54   ` David Gibson
2025-09-10  1:59 ` [PATCH v6 8/8] icmp: let icmp use mac address from flowside structure Jon Maloy
2025-09-10  2:57   ` David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250910015919.173414-3-jmaloy@redhat.com \
    --to=jmaloy@redhat.com \
    --cc=david@gibson.dropbear.id.au \
    --cc=dgibson@redhat.com \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).