public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
* [PATCH v2 0/8] use true mac address of LAN local remote hosts
@ 2025-06-12  4:21 Jon Maloy
  2025-06-12  4:21 ` [PATCH v2 1/8] netlink: Add function to extract mac addresses from arp table Jon Maloy
                   ` (7 more replies)
  0 siblings, 8 replies; 18+ messages in thread
From: Jon Maloy @ 2025-06-12  4:21 UTC (permalink / raw)
  To: sbrivio, dgibson, jmaloy, passt-dev

Bug #120 asks us to use the true MAC addresses of LAN local
remote hosts, since some programs need this information.
These commits introduces this for ARP, NDP, UDP, TCP and
ICMP.

Jon Maloy (8):
  netlink: Add function to extract mac addresses from arp table
  arp: respond with true mac address of LAN local remote hosts
  flow: add mac address of LAN local remote hosts to flow
  udp: forward external source mac address through tap interface
  tcp: forward external source mac address through tap interface
  tap: change signature of function tap_push_l2h()
  tcp: make tcp_rst_no_conn() respond with correct mac address
  icmp: let icmp use mac address from flowside structure

 arp.c          |  9 ++++++++
 flow.c         | 13 ++++++++++-
 flow.h         |  2 ++
 fwd.c          |  2 +-
 fwd.h          |  3 ++-
 icmp.c         |  4 ++--
 ndp.c          | 13 ++++++++++-
 netlink.c      | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++
 netlink.h      |  1 +
 tap.c          | 24 +++++++++++++--------
 tap.h          |  7 +++---
 tcp.c          | 20 ++++++++++++++---
 tcp_buf.c      | 27 +++++++++++------------
 tcp_internal.h |  2 +-
 tcp_vu.c       |  5 ++---
 udp.c          | 33 +++++++++++++---------------
 16 files changed, 166 insertions(+), 57 deletions(-)

-- 
2.48.1


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v2 1/8] netlink: Add function to extract mac addresses from arp table
  2025-06-12  4:21 [PATCH v2 0/8] use true mac address of LAN local remote hosts Jon Maloy
@ 2025-06-12  4:21 ` Jon Maloy
  2025-06-12 15:17   ` Stefano Brivio
  2025-06-12  4:21 ` [PATCH v2 2/8] arp: respond with true mac address of LAN local remote hosts Jon Maloy
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 18+ messages in thread
From: Jon Maloy @ 2025-06-12  4:21 UTC (permalink / raw)
  To: sbrivio, dgibson, jmaloy, passt-dev

The solution to bug #120 requires the ability to translate from
an IP address to its corresponding MAC address in cases where
those are present in the ARP/NTP table.

We add this feature here.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 netlink.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 netlink.h |  1 +
 2 files changed, 59 insertions(+)

diff --git a/netlink.c b/netlink.c
index ee9325a..6c55c4c 100644
--- a/netlink.c
+++ b/netlink.c
@@ -800,6 +800,64 @@ int nl_addr_get(int s, unsigned int ifi, sa_family_t af,
 	return status;
 }
 
+/**
+ * nl_mac_get() - Get mac address corresponding to given IP address
+ * @s:		Netlink socket
+ * @addr:	IPv4 or IPv6 address
+ * @mac:	Array to place the returned MAC address
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+int nl_mac_get(int s, const union inany_addr *addr, unsigned char *mac)
+{
+	struct nlmsghdr *nlh;
+	char buf[NLBUFSIZ];
+	const void *ip;
+	ssize_t status;
+	uint32_t seq;
+	int alen;
+	struct {
+		struct nlmsghdr nlh;
+		struct ndmsg ndm;
+	} req;
+
+	if (IN6_IS_ADDR_V4MAPPED(&addr->a6)) {
+		ip = &addr->v4mapped.a4;
+		alen = sizeof(struct in_addr);
+		req.ndm.ndm_family = AF_INET;
+	} else {
+		ip = &addr->a6;
+		alen = sizeof(struct in6_addr);
+		req.ndm.ndm_family = AF_INET6;
+	}
+
+	seq = nl_send(s, &req, RTM_GETNEIGH, NLM_F_DUMP, sizeof(req));
+	nl_foreach_oftype(nlh, status, s, buf, seq, RTM_NEWNEIGH) {
+		struct ndmsg *ndm = NLMSG_DATA(nlh);
+		struct rtattr *attr = (struct rtattr *)(ndm + 1);
+		int attrlen = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ndm));
+		unsigned char *lladdr = NULL;
+		void *neigh_ip = NULL;
+
+		for (; RTA_OK(attr, attrlen); attr = RTA_NEXT(attr, attrlen)) {
+			if (attr->rta_type == NDA_DST)
+				neigh_ip = RTA_DATA(attr);
+			else if (attr->rta_type == NDA_LLADDR)
+				lladdr = RTA_DATA(attr);
+		}
+
+		if (!neigh_ip || !lladdr)
+			continue;
+
+		if (!memcmp(neigh_ip, ip, alen)) {
+			memcpy(mac, lladdr, ETH_ALEN);
+			return 0;
+		}
+	}
+
+	return status;
+}
+
 /**
  * nl_addr_get_ll() - Get first IPv6 link-local address for a given interface
  * @s:		Netlink socket
diff --git a/netlink.h b/netlink.h
index b51e99c..2f674d7 100644
--- a/netlink.h
+++ b/netlink.h
@@ -17,6 +17,7 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
 		 int s_dst, unsigned int ifi_dst, sa_family_t af);
 int nl_addr_get(int s, unsigned int ifi, sa_family_t af,
 		void *addr, int *prefix_len, void *addr_l);
+int nl_mac_get(int s, const union inany_addr *addr, unsigned char *mac);
 int nl_addr_set(int s, unsigned int ifi, sa_family_t af,
 		const void *addr, int prefix_len);
 int nl_addr_get_ll(int s, unsigned int ifi, struct in6_addr *addr);
-- 
@@ -17,6 +17,7 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
 		 int s_dst, unsigned int ifi_dst, sa_family_t af);
 int nl_addr_get(int s, unsigned int ifi, sa_family_t af,
 		void *addr, int *prefix_len, void *addr_l);
+int nl_mac_get(int s, const union inany_addr *addr, unsigned char *mac);
 int nl_addr_set(int s, unsigned int ifi, sa_family_t af,
 		const void *addr, int prefix_len);
 int nl_addr_get_ll(int s, unsigned int ifi, struct in6_addr *addr);
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 2/8] arp: respond with true mac address of LAN local remote hosts
  2025-06-12  4:21 [PATCH v2 0/8] use true mac address of LAN local remote hosts Jon Maloy
  2025-06-12  4:21 ` [PATCH v2 1/8] netlink: Add function to extract mac addresses from arp table Jon Maloy
@ 2025-06-12  4:21 ` Jon Maloy
  2025-06-12 15:17   ` Stefano Brivio
  2025-06-12  4:21 ` [PATCH v2 3/8] flow: add mac address of LAN local remote hosts to flow Jon Maloy
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 18+ messages in thread
From: Jon Maloy @ 2025-06-12  4:21 UTC (permalink / raw)
  To: sbrivio, dgibson, jmaloy, passt-dev

When we receive an ARP or NTP request over the tap interface for a
host on the local network, we respond with that host's real mac
address.

The local host, which is acting as a proxy for the default gateway,
is still exempted from this rule.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 arp.c |  9 +++++++++
 fwd.c |  2 +-
 fwd.h |  3 ++-
 ndp.c | 11 +++++++++++
 4 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/arp.c b/arp.c
index fc482bb..fbd8219 100644
--- a/arp.c
+++ b/arp.c
@@ -29,6 +29,7 @@
 #include "dhcp.h"
 #include "passt.h"
 #include "tap.h"
+#include "netlink.h"
 
 /**
  * arp() - Check if this is a supported ARP message, reply as needed
@@ -39,6 +40,8 @@
  */
 int arp(const struct ctx *c, const struct pool *p)
 {
+	union inany_addr translated;
+	union inany_addr addr;
 	unsigned char swap[4];
 	struct ethhdr *eh;
 	struct arphdr *ah;
@@ -72,6 +75,12 @@ int arp(const struct ctx *c, const struct pool *p)
 	memcpy(am->tha,		am->sha,	sizeof(am->tha));
 	memcpy(am->sha,		c->our_tap_mac,	sizeof(am->sha));
 
+	/* If remote host on local network - respond with its mac address */
+	inany_from_af(&addr, AF_INET, am->tip);
+	nat_outbound(c, &addr, &translated);
+	if (!memcmp(&addr, &translated, sizeof(addr)))
+		nl_mac_get(nl_sock, &addr, am->sha);
+
 	memcpy(swap,		am->tip,	sizeof(am->tip));
 	memcpy(am->tip,		am->sip,	sizeof(am->tip));
 	memcpy(am->sip,		swap,		sizeof(am->sip));
diff --git a/fwd.c b/fwd.c
index 250cf56..02ebc9d 100644
--- a/fwd.c
+++ b/fwd.c
@@ -332,7 +332,7 @@ static bool fwd_guest_accessible(const struct ctx *c,
  * Only handles translations that depend *only* on the address.  Anything
  * related to specific ports or flows is handled elsewhere.
  */
-static void nat_outbound(const struct ctx *c, const union inany_addr *addr,
+void nat_outbound(const struct ctx *c, const union inany_addr *addr,
 			 union inany_addr *translated)
 {
 	if (inany_equals4(addr, &c->ip4.map_host_loopback))
diff --git a/fwd.h b/fwd.h
index 0458a3c..61632f2 100644
--- a/fwd.h
+++ b/fwd.h
@@ -56,5 +56,6 @@ uint8_t fwd_nat_from_splice(const struct ctx *c, uint8_t proto,
 			    const struct flowside *ini, struct flowside *tgt);
 uint8_t fwd_nat_from_host(const struct ctx *c, uint8_t proto,
 			  const struct flowside *ini, struct flowside *tgt);
-
+void nat_outbound(const struct ctx *c, const union inany_addr *addr,
+		  union inany_addr *translated);
 #endif /* FWD_H */
diff --git a/ndp.c b/ndp.c
index 3e15494..d702f37 100644
--- a/ndp.c
+++ b/ndp.c
@@ -32,6 +32,7 @@
 #include "passt.h"
 #include "tap.h"
 #include "log.h"
+#include "netlink.h"
 
 #define	RT_LIFETIME	65535
 
@@ -196,6 +197,9 @@ static void ndp_send(const struct ctx *c, const struct in6_addr *dst,
 static void ndp_na(const struct ctx *c, const struct in6_addr *dst,
 	    const struct in6_addr *addr)
 {
+	union inany_addr translated;
+	union inany_addr addr_any;
+
 	struct ndp_na na = {
 		.ih = {
 			.icmp6_type		= NA,
@@ -215,6 +219,13 @@ static void ndp_na(const struct ctx *c, const struct in6_addr *dst,
 
 	memcpy(na.target_l2_addr.mac, c->our_tap_mac, ETH_ALEN);
 
+	/* If remote host on local network - respond with its mac address */
+	inany_from_af(&addr_any, AF_INET6, addr);
+	nat_outbound(c, &addr_any, &translated);
+
+	if (!memcmp(&addr_any, &translated, sizeof(addr_any)))
+		nl_mac_get(nl_sock, &addr_any, na.target_l2_addr.mac);
+
 	ndp_send(c, dst, &na, sizeof(na));
 }
 
-- 
@@ -32,6 +32,7 @@
 #include "passt.h"
 #include "tap.h"
 #include "log.h"
+#include "netlink.h"
 
 #define	RT_LIFETIME	65535
 
@@ -196,6 +197,9 @@ static void ndp_send(const struct ctx *c, const struct in6_addr *dst,
 static void ndp_na(const struct ctx *c, const struct in6_addr *dst,
 	    const struct in6_addr *addr)
 {
+	union inany_addr translated;
+	union inany_addr addr_any;
+
 	struct ndp_na na = {
 		.ih = {
 			.icmp6_type		= NA,
@@ -215,6 +219,13 @@ static void ndp_na(const struct ctx *c, const struct in6_addr *dst,
 
 	memcpy(na.target_l2_addr.mac, c->our_tap_mac, ETH_ALEN);
 
+	/* If remote host on local network - respond with its mac address */
+	inany_from_af(&addr_any, AF_INET6, addr);
+	nat_outbound(c, &addr_any, &translated);
+
+	if (!memcmp(&addr_any, &translated, sizeof(addr_any)))
+		nl_mac_get(nl_sock, &addr_any, na.target_l2_addr.mac);
+
 	ndp_send(c, dst, &na, sizeof(na));
 }
 
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 3/8] flow: add mac address of LAN local remote hosts to flow
  2025-06-12  4:21 [PATCH v2 0/8] use true mac address of LAN local remote hosts Jon Maloy
  2025-06-12  4:21 ` [PATCH v2 1/8] netlink: Add function to extract mac addresses from arp table Jon Maloy
  2025-06-12  4:21 ` [PATCH v2 2/8] arp: respond with true mac address of LAN local remote hosts Jon Maloy
@ 2025-06-12  4:21 ` Jon Maloy
  2025-06-12 15:17   ` Stefano Brivio
  2025-06-12  4:21 ` [PATCH v2 4/8] udp: forward external source mac address through tap interface Jon Maloy
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 18+ messages in thread
From: Jon Maloy @ 2025-06-12  4:21 UTC (permalink / raw)
  To: sbrivio, dgibson, jmaloy, passt-dev

When communicating with remote hosts on the local network, some guest
applications want to see the real mac address of that host instead
of passt/pasta's own tap address. The flowside structure is a convenient
location for storing that address, so we do that in this commit.

Note that we don´t add usage of this address in this commit, - that
will come in later commits.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 flow.c | 13 ++++++++++++-
 flow.h |  2 ++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/flow.c b/flow.c
index da5c813..fffc817 100644
--- a/flow.c
+++ b/flow.c
@@ -20,6 +20,7 @@
 #include "flow.h"
 #include "flow_table.h"
 #include "repair.h"
+#include "netlink.h"
 
 const char *flow_state_str[] = {
 	[FLOW_STATE_FREE]	= "FREE",
@@ -438,7 +439,7 @@ struct flowside *flow_target(const struct ctx *c, union flow *flow,
 {
 	char estr[INANY_ADDRSTRLEN], fstr[INANY_ADDRSTRLEN];
 	struct flow_common *f = &flow->f;
-	const struct flowside *ini = &f->side[INISIDE];
+	struct flowside *ini = &f->side[INISIDE];
 	struct flowside *tgt = &f->side[TGTSIDE];
 	uint8_t tgtpif = PIF_NONE;
 
@@ -446,10 +447,16 @@ struct flowside *flow_target(const struct ctx *c, union flow *flow,
 	ASSERT(f->type == FLOW_TYPE_NONE);
 	ASSERT(f->pif[INISIDE] != PIF_NONE && f->pif[TGTSIDE] == PIF_NONE);
 	ASSERT(flow->f.state == FLOW_STATE_INI);
+	memcpy(ini->mac, c->our_tap_mac, ETH_ALEN);
+	memcpy(tgt->mac, c->our_tap_mac, ETH_ALEN);
 
 	switch (f->pif[INISIDE]) {
 	case PIF_TAP:
 		tgtpif = fwd_nat_from_tap(c, proto, ini, tgt);
+
+		/* If remote host on local network - insert its mac address */
+		if (!memcmp(&tgt->eaddr, &ini->oaddr, sizeof(ini->oaddr)))
+			nl_mac_get(nl_sock, &ini->oaddr, ini->mac);
 		break;
 
 	case PIF_SPLICE:
@@ -458,6 +465,10 @@ struct flowside *flow_target(const struct ctx *c, union flow *flow,
 
 	case PIF_HOST:
 		tgtpif = fwd_nat_from_host(c, proto, ini, tgt);
+
+		/* If remote host on local network - insert its mac address */
+		if (!memcmp(&tgt->oaddr, &ini->eaddr, sizeof(ini->eaddr)))
+			nl_mac_get(nl_sock, &tgt->oaddr, tgt->mac);
 		break;
 
 	default:
diff --git a/flow.h b/flow.h
index cac618a..916951b 100644
--- a/flow.h
+++ b/flow.h
@@ -143,12 +143,14 @@ extern const uint8_t flow_proto[];
  * @oaddr:	Our address (local address from passt's PoV)
  * @eport:	Endpoint port
  * @oport:	Our port
+ * @mac:	MAC address of remote endpoint
  */
 struct flowside {
 	union inany_addr	oaddr;
 	union inany_addr	eaddr;
 	in_port_t		oport;
 	in_port_t		eport;
+	unsigned char 		mac[6];
 };
 
 /**
-- 
@@ -143,12 +143,14 @@ extern const uint8_t flow_proto[];
  * @oaddr:	Our address (local address from passt's PoV)
  * @eport:	Endpoint port
  * @oport:	Our port
+ * @mac:	MAC address of remote endpoint
  */
 struct flowside {
 	union inany_addr	oaddr;
 	union inany_addr	eaddr;
 	in_port_t		oport;
 	in_port_t		eport;
+	unsigned char 		mac[6];
 };
 
 /**
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 4/8] udp: forward external source mac address through tap interface
  2025-06-12  4:21 [PATCH v2 0/8] use true mac address of LAN local remote hosts Jon Maloy
                   ` (2 preceding siblings ...)
  2025-06-12  4:21 ` [PATCH v2 3/8] flow: add mac address of LAN local remote hosts to flow Jon Maloy
@ 2025-06-12  4:21 ` Jon Maloy
  2025-06-12 15:17   ` Stefano Brivio
  2025-06-12  4:21 ` [PATCH v2 5/8] tcp: " Jon Maloy
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 18+ messages in thread
From: Jon Maloy @ 2025-06-12  4:21 UTC (permalink / raw)
  To: sbrivio, dgibson, jmaloy, passt-dev

We forward the incoming mac address through the tap interface when
receiving incoming packets from network local hosts. Packets from
the local host are excepted from this rule, and are still forwarded
with the default passt/pasta mac address as source.

This is a part of the solution to bug #120

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 udp.c | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/udp.c b/udp.c
index 65a52e0..ae8fbaf 100644
--- a/udp.c
+++ b/udp.c
@@ -133,11 +133,8 @@ static int udp_splice_init[IP_VERSIONS][NUM_PORTS];
 /* UDP header and data for inbound messages */
 static struct udp_payload_t udp_payload[UDP_MAX_FRAMES];
 
-/* Ethernet header for IPv4 frames */
-static struct ethhdr udp4_eth_hdr;
-
-/* Ethernet header for IPv6 frames */
-static struct ethhdr udp6_eth_hdr;
+/* Ethernet headers for IPv4 and IPv6 frames */
+static struct ethhdr udp_eth_hdr[UDP_MAX_FRAMES];
 
 /**
  * struct udp_meta_t - Pre-cooked headers for UDP packets
@@ -214,8 +211,10 @@ void udp_portmap_clear(void)
  */
 void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s)
 {
-	eth_update_mac(&udp4_eth_hdr, eth_d, eth_s);
-	eth_update_mac(&udp6_eth_hdr, eth_d, eth_s);
+	int i;
+
+	for (i = 0; i < UDP_MAX_FRAMES; i++)
+		eth_update_mac(&udp_eth_hdr[i], eth_d, eth_s);
 }
 
 /**
@@ -238,6 +237,7 @@ static void udp_iov_init_one(const struct ctx *c, size_t i)
 
 	*siov = IOV_OF_LVALUE(payload->data);
 
+	tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(udp_eth_hdr[i]);
 	tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &meta->taph);
 	tiov[UDP_IOV_PAYLOAD].iov_base = payload;
 
@@ -253,9 +253,6 @@ static void udp_iov_init(const struct ctx *c)
 {
 	size_t i;
 
-	udp4_eth_hdr.h_proto = htons_constant(ETH_P_IP);
-	udp6_eth_hdr.h_proto = htons_constant(ETH_P_IPV6);
-
 	for (i = 0; i < UDP_MAX_FRAMES; i++)
 		udp_iov_init_one(c, i);
 }
@@ -362,21 +359,21 @@ static void udp_tap_prepare(const struct mmsghdr *mmh,
 	struct iovec (*tap_iov)[UDP_NUM_IOVS] = &udp_l2_iov[idx];
 	struct udp_payload_t *bp = &udp_payload[idx];
 	struct udp_meta_t *bm = &udp_meta[idx];
+	struct ethhdr *eh = (*tap_iov)[UDP_IOV_ETH].iov_base;
 	size_t l4len;
 
+	eth_update_mac(eh, 0, toside->mac);
 	if (!inany_v4(&toside->eaddr) || !inany_v4(&toside->oaddr)) {
 		l4len = udp_update_hdr6(&bm->ip6h, bp, toside,
 					mmh[idx].msg_len, no_udp_csum);
-		tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip6h) +
-			       sizeof(udp6_eth_hdr));
-		(*tap_iov)[UDP_IOV_ETH] = IOV_OF_LVALUE(udp6_eth_hdr);
+		tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip6h) + ETH_HLEN);
+		eh->h_proto = htons_constant(ETH_P_IPV6);
 		(*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip6h);
 	} else {
 		l4len = udp_update_hdr4(&bm->ip4h, bp, toside,
 					mmh[idx].msg_len, no_udp_csum);
-		tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip4h) +
-			       sizeof(udp4_eth_hdr));
-		(*tap_iov)[UDP_IOV_ETH] = IOV_OF_LVALUE(udp4_eth_hdr);
+		tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip4h) + ETH_HLEN);
+		eh->h_proto = htons_constant(ETH_P_IP);
 		(*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip4h);
 	}
 	(*tap_iov)[UDP_IOV_PAYLOAD].iov_len = l4len;
-- 
@@ -133,11 +133,8 @@ static int udp_splice_init[IP_VERSIONS][NUM_PORTS];
 /* UDP header and data for inbound messages */
 static struct udp_payload_t udp_payload[UDP_MAX_FRAMES];
 
-/* Ethernet header for IPv4 frames */
-static struct ethhdr udp4_eth_hdr;
-
-/* Ethernet header for IPv6 frames */
-static struct ethhdr udp6_eth_hdr;
+/* Ethernet headers for IPv4 and IPv6 frames */
+static struct ethhdr udp_eth_hdr[UDP_MAX_FRAMES];
 
 /**
  * struct udp_meta_t - Pre-cooked headers for UDP packets
@@ -214,8 +211,10 @@ void udp_portmap_clear(void)
  */
 void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s)
 {
-	eth_update_mac(&udp4_eth_hdr, eth_d, eth_s);
-	eth_update_mac(&udp6_eth_hdr, eth_d, eth_s);
+	int i;
+
+	for (i = 0; i < UDP_MAX_FRAMES; i++)
+		eth_update_mac(&udp_eth_hdr[i], eth_d, eth_s);
 }
 
 /**
@@ -238,6 +237,7 @@ static void udp_iov_init_one(const struct ctx *c, size_t i)
 
 	*siov = IOV_OF_LVALUE(payload->data);
 
+	tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(udp_eth_hdr[i]);
 	tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &meta->taph);
 	tiov[UDP_IOV_PAYLOAD].iov_base = payload;
 
@@ -253,9 +253,6 @@ static void udp_iov_init(const struct ctx *c)
 {
 	size_t i;
 
-	udp4_eth_hdr.h_proto = htons_constant(ETH_P_IP);
-	udp6_eth_hdr.h_proto = htons_constant(ETH_P_IPV6);
-
 	for (i = 0; i < UDP_MAX_FRAMES; i++)
 		udp_iov_init_one(c, i);
 }
@@ -362,21 +359,21 @@ static void udp_tap_prepare(const struct mmsghdr *mmh,
 	struct iovec (*tap_iov)[UDP_NUM_IOVS] = &udp_l2_iov[idx];
 	struct udp_payload_t *bp = &udp_payload[idx];
 	struct udp_meta_t *bm = &udp_meta[idx];
+	struct ethhdr *eh = (*tap_iov)[UDP_IOV_ETH].iov_base;
 	size_t l4len;
 
+	eth_update_mac(eh, 0, toside->mac);
 	if (!inany_v4(&toside->eaddr) || !inany_v4(&toside->oaddr)) {
 		l4len = udp_update_hdr6(&bm->ip6h, bp, toside,
 					mmh[idx].msg_len, no_udp_csum);
-		tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip6h) +
-			       sizeof(udp6_eth_hdr));
-		(*tap_iov)[UDP_IOV_ETH] = IOV_OF_LVALUE(udp6_eth_hdr);
+		tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip6h) + ETH_HLEN);
+		eh->h_proto = htons_constant(ETH_P_IPV6);
 		(*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip6h);
 	} else {
 		l4len = udp_update_hdr4(&bm->ip4h, bp, toside,
 					mmh[idx].msg_len, no_udp_csum);
-		tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip4h) +
-			       sizeof(udp4_eth_hdr));
-		(*tap_iov)[UDP_IOV_ETH] = IOV_OF_LVALUE(udp4_eth_hdr);
+		tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip4h) + ETH_HLEN);
+		eh->h_proto = htons_constant(ETH_P_IP);
 		(*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip4h);
 	}
 	(*tap_iov)[UDP_IOV_PAYLOAD].iov_len = l4len;
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 5/8] tcp: forward external source mac address through tap interface
  2025-06-12  4:21 [PATCH v2 0/8] use true mac address of LAN local remote hosts Jon Maloy
                   ` (3 preceding siblings ...)
  2025-06-12  4:21 ` [PATCH v2 4/8] udp: forward external source mac address through tap interface Jon Maloy
@ 2025-06-12  4:21 ` Jon Maloy
  2025-06-12 15:17   ` Stefano Brivio
  2025-06-12  4:21 ` [PATCH v2 6/8] tap: change signature of function tap_push_l2h() Jon Maloy
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 18+ messages in thread
From: Jon Maloy @ 2025-06-12  4:21 UTC (permalink / raw)
  To: sbrivio, dgibson, jmaloy, passt-dev

We forward the incoming mac address through the tap interface when
receiving incoming packets from network local hosts. Packets from
the local host are excepted from this rule, and are still forwarded
with the default passt/pasta mac address as source.

This is a part of the solution to bug #120

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 tcp.c          |  5 ++++-
 tcp_buf.c      | 27 +++++++++++++--------------
 tcp_internal.h |  2 +-
 tcp_vu.c       |  5 ++---
 4 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/tcp.c b/tcp.c
index f43c1e2..777f345 100644
--- a/tcp.c
+++ b/tcp.c
@@ -920,7 +920,7 @@ static void tcp_fill_header(struct tcphdr *th,
  * @no_tcp_csum:	Do not set TCP checksum
  */
 void tcp_fill_headers(const struct tcp_tap_conn *conn,
-		      struct tap_hdr *taph,
+		      struct tap_hdr *taph, struct ethhdr *eh,
 		      struct iphdr *ip4h, struct ipv6hdr *ip6h,
 		      struct tcphdr *th, struct iov_tail *payload,
 		      const uint16_t *ip4_check, uint32_t seq, bool no_tcp_csum)
@@ -952,6 +952,7 @@ void tcp_fill_headers(const struct tcp_tap_conn *conn,
 			psum = proto_ipv4_header_psum(l4len, IPPROTO_TCP,
 						      *src4, *dst4);
 		}
+		eh->h_proto = htons_constant(ETH_P_IP);
 	}
 
 	if (ip6h) {
@@ -972,7 +973,9 @@ void tcp_fill_headers(const struct tcp_tap_conn *conn,
 						      &ip6h->saddr,
 						      &ip6h->daddr);
 		}
+		eh->h_proto = htons_constant(ETH_P_IPV6);
 	}
+	eth_update_mac(eh, 0, tapside->mac);
 
 	tcp_fill_header(th, conn, seq);
 
diff --git a/tcp_buf.c b/tcp_buf.c
index d1fca67..2dbeca2 100644
--- a/tcp_buf.c
+++ b/tcp_buf.c
@@ -40,8 +40,7 @@
 /* Static buffers */
 
 /* Ethernet header for IPv4 and IPv6 frames */
-static struct ethhdr		tcp4_eth_src;
-static struct ethhdr		tcp6_eth_src;
+static struct ethhdr		tcp_eth_hdr[TCP_FRAMES_MEM];
 
 static struct tap_hdr		tcp_payload_tap_hdr[TCP_FRAMES_MEM];
 
@@ -71,8 +70,10 @@ static struct iovec	tcp_l2_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS];
  */
 void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s)
 {
-	eth_update_mac(&tcp4_eth_src, eth_d, eth_s);
-	eth_update_mac(&tcp6_eth_src, eth_d, eth_s);
+	int i;
+
+	for (i = 0; i < TCP_FRAMES_MEM; i ++)
+		eth_update_mac(&tcp_eth_hdr[i], eth_d, eth_s);
 }
 
 /**
@@ -85,8 +86,8 @@ void tcp_sock_iov_init(const struct ctx *c)
 	struct iphdr iph = L2_BUF_IP4_INIT(IPPROTO_TCP);
 	int i;
 
-	tcp6_eth_src.h_proto = htons_constant(ETH_P_IPV6);
-	tcp4_eth_src.h_proto = htons_constant(ETH_P_IP);
+	for (i = 0; i < TCP_FRAMES_MEM; i ++)
+		eth_update_mac(&tcp_eth_hdr[i], NULL, c->our_tap_mac);
 
 	for (i = 0; i < ARRAY_SIZE(tcp_payload); i++) {
 		tcp6_payload_ip[i] = ip6;
@@ -164,6 +165,7 @@ static void tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn,
 	struct tap_hdr *taph = iov[TCP_IOV_TAP].iov_base;
 	const struct flowside *tapside = TAPFLOW(conn);
 	const struct in_addr *a4 = inany_v4(&tapside->oaddr);
+	struct ethhdr *eh = iov[TCP_IOV_ETH].iov_base;
 	struct ipv6hdr *ip6h = NULL;
 	struct iphdr *ip4h = NULL;
 
@@ -172,7 +174,7 @@ static void tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn,
 	else
 		ip6h = iov[TCP_IOV_IP].iov_base;
 
-	tcp_fill_headers(conn, taph, ip4h, ip6h, th, &tail,
+	tcp_fill_headers(conn, taph, eh, ip4h, ip6h, th, &tail,
 			 check, seq, no_tcp_csum);
 }
 
@@ -194,14 +196,12 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
 	int ret;
 
 	iov = tcp_l2_iov[tcp_payload_used];
-	if (CONN_V4(conn)) {
+	if (CONN_V4(conn))
 		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[tcp_payload_used]);
-		iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
-	} else {
+	else
 		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
-		iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
-	}
 
+	iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp_eth_hdr[tcp_payload_used]);
 	payload = iov[TCP_IOV_PAYLOAD].iov_base;
 	seq = conn->seq_to_tap;
 	ret = tcp_prepare_flags(c, conn, flags, &payload->th,
@@ -259,11 +259,10 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
 			check = &iph->check;
 		}
 		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[tcp_payload_used]);
-		iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
 	} else if (CONN_V6(conn)) {
 		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
-		iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
 	}
+	iov[TCP_IOV_ETH].iov_base = &tcp_eth_hdr[tcp_payload_used];
 	payload = iov[TCP_IOV_PAYLOAD].iov_base;
 	payload->th.th_off = sizeof(struct tcphdr) / 4;
 	payload->th.th_x2 = 0;
diff --git a/tcp_internal.h b/tcp_internal.h
index 36c6533..6c2d1ef 100644
--- a/tcp_internal.h
+++ b/tcp_internal.h
@@ -167,7 +167,7 @@ void tcp_rst_do(const struct ctx *c, struct tcp_tap_conn *conn);
 struct tcp_info_linux;
 
 void tcp_fill_headers(const struct tcp_tap_conn *conn,
-		      struct tap_hdr *taph,
+		      struct tap_hdr *taph, struct ethhdr *eh,
 		      struct iphdr *ip4h, struct ipv6hdr *ip6h,
 		      struct tcphdr *th, struct iov_tail *payload,
 		      const uint16_t *ip4_check, uint32_t seq, bool no_tcp_csum);
diff --git a/tcp_vu.c b/tcp_vu.c
index f3914c7..da1fb37 100644
--- a/tcp_vu.c
+++ b/tcp_vu.c
@@ -135,7 +135,7 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
 	flags_elem[0].in_sg[0].iov_len = hdrlen + optlen;
 	payload = IOV_TAIL(flags_elem[0].in_sg, 1, hdrlen);
 
-	tcp_fill_headers(conn, NULL, ip4h, ip6h, th, &payload,
+	tcp_fill_headers(conn, NULL, eh, ip4h, ip6h, th, &payload,
 			 NULL, seq, !*c->pcap);
 
 	if (*c->pcap) {
@@ -315,7 +315,6 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
 	eh = vu_eth(base);
 
 	memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest));
-	memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source));
 
 	/* initialize header */
 
@@ -339,7 +338,7 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
 	th->ack = 1;
 	th->psh = push;
 
-	tcp_fill_headers(conn, NULL, ip4h, ip6h, th, &payload,
+	tcp_fill_headers(conn, NULL, eh, ip4h, ip6h, th, &payload,
 			 *check, conn->seq_to_tap, no_tcp_csum);
 	if (ip4h)
 		*check = &ip4h->check;
-- 
@@ -135,7 +135,7 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
 	flags_elem[0].in_sg[0].iov_len = hdrlen + optlen;
 	payload = IOV_TAIL(flags_elem[0].in_sg, 1, hdrlen);
 
-	tcp_fill_headers(conn, NULL, ip4h, ip6h, th, &payload,
+	tcp_fill_headers(conn, NULL, eh, ip4h, ip6h, th, &payload,
 			 NULL, seq, !*c->pcap);
 
 	if (*c->pcap) {
@@ -315,7 +315,6 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
 	eh = vu_eth(base);
 
 	memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest));
-	memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source));
 
 	/* initialize header */
 
@@ -339,7 +338,7 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
 	th->ack = 1;
 	th->psh = push;
 
-	tcp_fill_headers(conn, NULL, ip4h, ip6h, th, &payload,
+	tcp_fill_headers(conn, NULL, eh, ip4h, ip6h, th, &payload,
 			 *check, conn->seq_to_tap, no_tcp_csum);
 	if (ip4h)
 		*check = &ip4h->check;
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 6/8] tap: change signature of function tap_push_l2h()
  2025-06-12  4:21 [PATCH v2 0/8] use true mac address of LAN local remote hosts Jon Maloy
                   ` (4 preceding siblings ...)
  2025-06-12  4:21 ` [PATCH v2 5/8] tcp: " Jon Maloy
@ 2025-06-12  4:21 ` Jon Maloy
  2025-06-12 15:17   ` Stefano Brivio
  2025-06-12  4:21 ` [PATCH v2 7/8] tcp: make tcp_rst_no_conn() respond with correct mac address Jon Maloy
  2025-06-12  4:21 ` [PATCH v2 8/8] icmp: let icmp use mac address from flowside structure Jon Maloy
  7 siblings, 1 reply; 18+ messages in thread
From: Jon Maloy @ 2025-06-12  4:21 UTC (permalink / raw)
  To: sbrivio, dgibson, jmaloy, passt-dev

In the following commits it must be possible for the callers of
funtion tap_push_l2h() to specify which source mac address should
be added to the ethernet header sent over the tap interface. As
a preparation, we now add a new argument to that function, still
without actually using it.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 tap.c | 18 +++++++++++-------
 tap.h |  3 ++-
 tcp.c |  4 ++--
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/tap.c b/tap.c
index 6db5d88..634c012 100644
--- a/tap.c
+++ b/tap.c
@@ -171,17 +171,21 @@ const struct in6_addr *tap_ip6_daddr(const struct ctx *c,
  * tap_push_l2h() - Build an L2 header for an inbound packet
  * @c:		Execution context
  * @buf:	Buffer address at which to generate header
+ * @src_mac:	MAC address to be used as source for message
  * @proto:	Ethernet protocol number for L3
  *
  * Return: pointer at which to write the packet's payload
  */
-void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto)
+void *tap_push_l2h(const struct ctx *c, void *buf,
+		   const void *src_mac, uint16_t proto)
 {
 	struct ethhdr *eh = (struct ethhdr *)buf;
 
-	/* TODO: ARP table lookup */
 	memcpy(eh->h_dest, c->guest_mac, ETH_ALEN);
-	memcpy(eh->h_source, c->our_tap_mac, ETH_ALEN);
+	if (src_mac)
+		memcpy(eh->h_source, src_mac, ETH_ALEN);
+	else
+		memcpy(eh->h_source, c->our_tap_mac, ETH_ALEN);
 	eh->h_proto = ntohs(proto);
 	return eh + 1;
 }
@@ -261,7 +265,7 @@ void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
 {
 	size_t l4len = dlen + sizeof(struct udphdr);
 	char buf[USHRT_MAX];
-	struct iphdr *ip4h = tap_push_l2h(c, buf, ETH_P_IP);
+	struct iphdr *ip4h = tap_push_l2h(c, buf, NULL, ETH_P_IP);
 	struct udphdr *uh = tap_push_ip4h(ip4h, src, dst, l4len, IPPROTO_UDP);
 	char *data = tap_push_uh4(uh, src, sport, dst, dport, in, dlen);
 
@@ -281,7 +285,7 @@ void tap_icmp4_send(const struct ctx *c, struct in_addr src, struct in_addr dst,
 		    const void *in, size_t l4len)
 {
 	char buf[USHRT_MAX];
-	struct iphdr *ip4h = tap_push_l2h(c, buf, ETH_P_IP);
+	struct iphdr *ip4h = tap_push_l2h(c, buf, NULL, ETH_P_IP);
 	struct icmphdr *icmp4h = tap_push_ip4h(ip4h, src, dst,
 					       l4len, IPPROTO_ICMP);
 
@@ -367,7 +371,7 @@ void tap_udp6_send(const struct ctx *c,
 {
 	size_t l4len = dlen + sizeof(struct udphdr);
 	char buf[USHRT_MAX];
-	struct ipv6hdr *ip6h = tap_push_l2h(c, buf, ETH_P_IPV6);
+	struct ipv6hdr *ip6h = tap_push_l2h(c, buf, NULL, ETH_P_IPV6);
 	struct udphdr *uh = tap_push_ip6h(ip6h, src, dst,
 					  l4len, IPPROTO_UDP, flow);
 	char *data = tap_push_uh6(uh, src, sport, dst, dport, in, dlen);
@@ -389,7 +393,7 @@ void tap_icmp6_send(const struct ctx *c,
 		    const void *in, size_t l4len)
 {
 	char buf[USHRT_MAX];
-	struct ipv6hdr *ip6h = tap_push_l2h(c, buf, ETH_P_IPV6);
+	struct ipv6hdr *ip6h = tap_push_l2h(c, buf, NULL, ETH_P_IPV6);
 	struct icmp6hdr *icmp6h = tap_push_ip6h(ip6h, src, dst, l4len,
 						IPPROTO_ICMPV6, 0);
 
diff --git a/tap.h b/tap.h
index 6fe3d15..e640d95 100644
--- a/tap.h
+++ b/tap.h
@@ -70,7 +70,8 @@ static inline void tap_hdr_update(struct tap_hdr *thdr, size_t l2len)
 }
 
 unsigned long tap_l2_max_len(const struct ctx *c);
-void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto);
+void *tap_push_l2h(const struct ctx *c, void *buf,
+		   const void *src_mac, uint16_t proto);
 void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
 		     struct in_addr dst, size_t l4len, uint8_t proto);
 void *tap_push_uh4(struct udphdr *uh, struct in_addr src, in_port_t sport,
diff --git a/tcp.c b/tcp.c
index 777f345..1a32424 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1898,7 +1898,7 @@ static void tcp_rst_no_conn(const struct ctx *c, int af,
 		return;
 
 	if (af == AF_INET) {
-		struct iphdr *ip4h = tap_push_l2h(c, buf, ETH_P_IP);
+		struct iphdr *ip4h = tap_push_l2h(c, buf, NULL, ETH_P_IP);
 		const struct in_addr *rst_src = daddr;
 		const struct in_addr *rst_dst = saddr;
 
@@ -1908,7 +1908,7 @@ static void tcp_rst_no_conn(const struct ctx *c, int af,
 					      *rst_src, *rst_dst);
 
 	} else {
-		struct ipv6hdr *ip6h = tap_push_l2h(c, buf, ETH_P_IPV6);
+		struct ipv6hdr *ip6h = tap_push_l2h(c, buf, NULL, ETH_P_IPV6);
 		const struct in6_addr *rst_src = daddr;
 		const struct in6_addr *rst_dst = saddr;
 
-- 
@@ -1898,7 +1898,7 @@ static void tcp_rst_no_conn(const struct ctx *c, int af,
 		return;
 
 	if (af == AF_INET) {
-		struct iphdr *ip4h = tap_push_l2h(c, buf, ETH_P_IP);
+		struct iphdr *ip4h = tap_push_l2h(c, buf, NULL, ETH_P_IP);
 		const struct in_addr *rst_src = daddr;
 		const struct in_addr *rst_dst = saddr;
 
@@ -1908,7 +1908,7 @@ static void tcp_rst_no_conn(const struct ctx *c, int af,
 					      *rst_src, *rst_dst);
 
 	} else {
-		struct ipv6hdr *ip6h = tap_push_l2h(c, buf, ETH_P_IPV6);
+		struct ipv6hdr *ip6h = tap_push_l2h(c, buf, NULL, ETH_P_IPV6);
 		const struct in6_addr *rst_src = daddr;
 		const struct in6_addr *rst_dst = saddr;
 
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 7/8] tcp: make tcp_rst_no_conn() respond with correct mac address
  2025-06-12  4:21 [PATCH v2 0/8] use true mac address of LAN local remote hosts Jon Maloy
                   ` (5 preceding siblings ...)
  2025-06-12  4:21 ` [PATCH v2 6/8] tap: change signature of function tap_push_l2h() Jon Maloy
@ 2025-06-12  4:21 ` Jon Maloy
  2025-06-12 15:17   ` Stefano Brivio
  2025-06-12  4:21 ` [PATCH v2 8/8] icmp: let icmp use mac address from flowside structure Jon Maloy
  7 siblings, 1 reply; 18+ messages in thread
From: Jon Maloy @ 2025-06-12  4:21 UTC (permalink / raw)
  To: sbrivio, dgibson, jmaloy, passt-dev

tcp_rst_no_conn() needs to identify and specify which source mac
address to use when sending an RST to the guest. This is because
it doesn't have access to any flow structure where this address
could be fetched.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 tcp.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/tcp.c b/tcp.c
index 1a32424..b49f603 100644
--- a/tcp.c
+++ b/tcp.c
@@ -309,6 +309,7 @@
 #include "tcp_internal.h"
 #include "tcp_buf.h"
 #include "tcp_vu.h"
+#include "netlink.h"
 
 #ifndef __USE_MISC
 /* From Linux UAPI, missing in netinet/tcp.h provided by musl */
@@ -1888,6 +1889,9 @@ static void tcp_rst_no_conn(const struct ctx *c, int af,
 			    const struct tcphdr *th, size_t l4len)
 {
 	struct iov_tail payload = IOV_TAIL(NULL, 0, 0);
+	unsigned char src_mac[ETH_ALEN];
+	union inany_addr translated;
+	union inany_addr dst;
 	struct tcphdr *rsth;
 	char buf[USHRT_MAX];
 	uint32_t psum = 0;
@@ -1897,8 +1901,15 @@ static void tcp_rst_no_conn(const struct ctx *c, int af,
 	if (th->rst)
 		return;
 
+	/* If remote host on local network - respond with its mac address */
+	memcpy(src_mac, c->our_tap_mac, ETH_ALEN);
+	inany_from_af(&dst, af, daddr);
+	nat_outbound(c, &dst, &translated);
+	if (!memcmp(&dst, &translated, sizeof(dst)))
+		nl_mac_get(nl_sock, &dst, src_mac);
+
 	if (af == AF_INET) {
-		struct iphdr *ip4h = tap_push_l2h(c, buf, NULL, ETH_P_IP);
+		struct iphdr *ip4h = tap_push_l2h(c, buf, src_mac, ETH_P_IP);
 		const struct in_addr *rst_src = daddr;
 		const struct in_addr *rst_dst = saddr;
 
@@ -1908,7 +1919,7 @@ static void tcp_rst_no_conn(const struct ctx *c, int af,
 					      *rst_src, *rst_dst);
 
 	} else {
-		struct ipv6hdr *ip6h = tap_push_l2h(c, buf, NULL, ETH_P_IPV6);
+		struct ipv6hdr *ip6h = tap_push_l2h(c, buf, src_mac, ETH_P_IPV6);
 		const struct in6_addr *rst_src = daddr;
 		const struct in6_addr *rst_dst = saddr;
 
-- 
@@ -309,6 +309,7 @@
 #include "tcp_internal.h"
 #include "tcp_buf.h"
 #include "tcp_vu.h"
+#include "netlink.h"
 
 #ifndef __USE_MISC
 /* From Linux UAPI, missing in netinet/tcp.h provided by musl */
@@ -1888,6 +1889,9 @@ static void tcp_rst_no_conn(const struct ctx *c, int af,
 			    const struct tcphdr *th, size_t l4len)
 {
 	struct iov_tail payload = IOV_TAIL(NULL, 0, 0);
+	unsigned char src_mac[ETH_ALEN];
+	union inany_addr translated;
+	union inany_addr dst;
 	struct tcphdr *rsth;
 	char buf[USHRT_MAX];
 	uint32_t psum = 0;
@@ -1897,8 +1901,15 @@ static void tcp_rst_no_conn(const struct ctx *c, int af,
 	if (th->rst)
 		return;
 
+	/* If remote host on local network - respond with its mac address */
+	memcpy(src_mac, c->our_tap_mac, ETH_ALEN);
+	inany_from_af(&dst, af, daddr);
+	nat_outbound(c, &dst, &translated);
+	if (!memcmp(&dst, &translated, sizeof(dst)))
+		nl_mac_get(nl_sock, &dst, src_mac);
+
 	if (af == AF_INET) {
-		struct iphdr *ip4h = tap_push_l2h(c, buf, NULL, ETH_P_IP);
+		struct iphdr *ip4h = tap_push_l2h(c, buf, src_mac, ETH_P_IP);
 		const struct in_addr *rst_src = daddr;
 		const struct in_addr *rst_dst = saddr;
 
@@ -1908,7 +1919,7 @@ static void tcp_rst_no_conn(const struct ctx *c, int af,
 					      *rst_src, *rst_dst);
 
 	} else {
-		struct ipv6hdr *ip6h = tap_push_l2h(c, buf, NULL, ETH_P_IPV6);
+		struct ipv6hdr *ip6h = tap_push_l2h(c, buf, src_mac, ETH_P_IPV6);
 		const struct in6_addr *rst_src = daddr;
 		const struct in6_addr *rst_dst = saddr;
 
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 8/8] icmp: let icmp use mac address from flowside structure
  2025-06-12  4:21 [PATCH v2 0/8] use true mac address of LAN local remote hosts Jon Maloy
                   ` (6 preceding siblings ...)
  2025-06-12  4:21 ` [PATCH v2 7/8] tcp: make tcp_rst_no_conn() respond with correct mac address Jon Maloy
@ 2025-06-12  4:21 ` Jon Maloy
  7 siblings, 0 replies; 18+ messages in thread
From: Jon Maloy @ 2025-06-12  4:21 UTC (permalink / raw)
  To: sbrivio, dgibson, jmaloy, passt-dev

Even ICMP needs to be updated to use the flowside mac address instead
of just the own tap address. We do that here.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 icmp.c |  4 ++--
 ndp.c  |  2 +-
 tap.c  | 10 ++++++----
 tap.h  |  4 ++--
 udp.c  |  4 ++--
 5 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/icmp.c b/icmp.c
index 7e2b342..da85937 100644
--- a/icmp.c
+++ b/icmp.c
@@ -129,12 +129,12 @@ void icmp_sock_handler(const struct ctx *c, union epoll_ref ref)
 		const struct in_addr *daddr = inany_v4(&ini->eaddr);
 
 		ASSERT(saddr && daddr); /* Must have IPv4 addresses */
-		tap_icmp4_send(c, *saddr, *daddr, buf, n);
+		tap_icmp4_send(c, *saddr, *daddr, buf, ini->mac, n);
 	} else if (pingf->f.type == FLOW_PING6) {
 		const struct in6_addr *saddr = &ini->oaddr.a6;
 		const struct in6_addr *daddr = &ini->eaddr.a6;
 
-		tap_icmp6_send(c, saddr, daddr, buf, n);
+		tap_icmp6_send(c, saddr, daddr, buf, ini->mac, n);
 	}
 	return;
 
diff --git a/ndp.c b/ndp.c
index d702f37..a30d3a0 100644
--- a/ndp.c
+++ b/ndp.c
@@ -185,7 +185,7 @@ static void ndp_send(const struct ctx *c, const struct in6_addr *dst,
 {
 	const struct in6_addr *src = &c->ip6.our_tap_ll;
 
-	tap_icmp6_send(c, src, dst, buf, l4len);
+	tap_icmp6_send(c, src, dst, buf, c->our_tap_mac, l4len);
 }
 
 /**
diff --git a/tap.c b/tap.c
index 634c012..1d8963a 100644
--- a/tap.c
+++ b/tap.c
@@ -279,13 +279,14 @@ void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
  * @src:	IPv4 source address
  * @dst:	IPv4 destination address
  * @in:		ICMP packet, including ICMP header
+ * @src_mac:	MAC address to be used as source for message
  * @l4len:	ICMP packet length, including ICMP header
  */
 void tap_icmp4_send(const struct ctx *c, struct in_addr src, struct in_addr dst,
-		    const void *in, size_t l4len)
+		    const void *in, const void *src_mac, size_t l4len)
 {
 	char buf[USHRT_MAX];
-	struct iphdr *ip4h = tap_push_l2h(c, buf, NULL, ETH_P_IP);
+	struct iphdr *ip4h = tap_push_l2h(c, buf, src_mac, ETH_P_IP);
 	struct icmphdr *icmp4h = tap_push_ip4h(ip4h, src, dst,
 					       l4len, IPPROTO_ICMP);
 
@@ -386,14 +387,15 @@ void tap_udp6_send(const struct ctx *c,
  * @src:	IPv6 source address
  * @dst:	IPv6 destination address
  * @in:		ICMP packet, including ICMP header
+ * @src_mac:	MAC address to be used as source for message
  * @l4len:	ICMP packet length, including ICMP header
  */
 void tap_icmp6_send(const struct ctx *c,
 		    const struct in6_addr *src, const struct in6_addr *dst,
-		    const void *in, size_t l4len)
+		    const void *in, const void *src_mac, size_t l4len)
 {
 	char buf[USHRT_MAX];
-	struct ipv6hdr *ip6h = tap_push_l2h(c, buf, NULL, ETH_P_IPV6);
+	struct ipv6hdr *ip6h = tap_push_l2h(c, buf, src_mac, ETH_P_IPV6);
 	struct icmp6hdr *icmp6h = tap_push_ip6h(ip6h, src, dst, l4len,
 						IPPROTO_ICMPV6, 0);
 
diff --git a/tap.h b/tap.h
index e640d95..7025eaa 100644
--- a/tap.h
+++ b/tap.h
@@ -91,7 +91,7 @@ void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
 		   struct in_addr dst, in_port_t dport,
 		   const void *in, size_t dlen);
 void tap_icmp4_send(const struct ctx *c, struct in_addr src, struct in_addr dst,
-		    const void *in, size_t l4len);
+		    const void *in, const void *src_mac, size_t l4len);
 const struct in6_addr *tap_ip6_daddr(const struct ctx *c,
 				     const struct in6_addr *src);
 void *tap_push_ip6h(struct ipv6hdr *ip6h,
@@ -103,7 +103,7 @@ void tap_udp6_send(const struct ctx *c,
 		   uint32_t flow, void *in, size_t dlen);
 void tap_icmp6_send(const struct ctx *c,
 		    const struct in6_addr *src, const struct in6_addr *dst,
-		    const void *in, size_t l4len);
+		    const void *in, const void *src_mac, size_t l4len);
 void tap_send_single(const struct ctx *c, const void *data, size_t l2len);
 size_t tap_send_frames(const struct ctx *c, const struct iovec *iov,
 		       size_t bufs_per_frame, size_t nframes);
diff --git a/udp.c b/udp.c
index ae8fbaf..28ec061 100644
--- a/udp.c
+++ b/udp.c
@@ -419,7 +419,7 @@ static void udp_send_tap_icmp4(const struct ctx *c,
 	tap_push_uh4(&msg.uh, eaddr, eport, oaddr, oport, in, dlen);
 	memcpy(&msg.data, in, dlen);
 
-	tap_icmp4_send(c, saddr, eaddr, &msg, msglen);
+	tap_icmp4_send(c, saddr, eaddr, &msg, toside->mac, msglen);
 }
 
 
@@ -464,7 +464,7 @@ static void udp_send_tap_icmp6(const struct ctx *c,
 	tap_push_uh6(&msg.uh, eaddr, eport, oaddr, oport, in, dlen);
 	memcpy(&msg.data, in, dlen);
 
-	tap_icmp6_send(c, saddr, eaddr, &msg, msglen);
+	tap_icmp6_send(c, saddr, eaddr, &msg, toside->mac, msglen);
 }
 
 /**
-- 
@@ -419,7 +419,7 @@ static void udp_send_tap_icmp4(const struct ctx *c,
 	tap_push_uh4(&msg.uh, eaddr, eport, oaddr, oport, in, dlen);
 	memcpy(&msg.data, in, dlen);
 
-	tap_icmp4_send(c, saddr, eaddr, &msg, msglen);
+	tap_icmp4_send(c, saddr, eaddr, &msg, toside->mac, msglen);
 }
 
 
@@ -464,7 +464,7 @@ static void udp_send_tap_icmp6(const struct ctx *c,
 	tap_push_uh6(&msg.uh, eaddr, eport, oaddr, oport, in, dlen);
 	memcpy(&msg.data, in, dlen);
 
-	tap_icmp6_send(c, saddr, eaddr, &msg, msglen);
+	tap_icmp6_send(c, saddr, eaddr, &msg, toside->mac, msglen);
 }
 
 /**
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 1/8] netlink: Add function to extract mac addresses from arp table
  2025-06-12  4:21 ` [PATCH v2 1/8] netlink: Add function to extract mac addresses from arp table Jon Maloy
@ 2025-06-12 15:17   ` Stefano Brivio
  2025-06-13  6:11     ` David Gibson
  0 siblings, 1 reply; 18+ messages in thread
From: Stefano Brivio @ 2025-06-12 15:17 UTC (permalink / raw)
  To: Jon Maloy; +Cc: dgibson, passt-dev

On Thu, 12 Jun 2025 00:21:45 -0400
Jon Maloy <jmaloy@redhat.com> wrote:

> The solution to bug #120 requires the ability to translate from
> an IP address to its corresponding MAC address in cases where
> those are present in the ARP/NTP table.

Nit: NDP.

> We add this feature here.
> 
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>

Nit: David's address is david@gibson.dropbear.id.au (in case you need
to Cc: him specifically).

Nit: while bug #120 isn't fixed at once by one specific patch in this
series, I would rather be liberal with Link: tags, better a few than
nothing, should we ever need to look up commits in the future. That is,

Link: https://bugs.passt.top/show_bug.cgi?id=120

would fit nicely here and in a few other patches, I think.

> ---
>  netlink.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  netlink.h |  1 +
>  2 files changed, 59 insertions(+)
> 
> diff --git a/netlink.c b/netlink.c
> index ee9325a..6c55c4c 100644
> --- a/netlink.c
> +++ b/netlink.c
> @@ -800,6 +800,64 @@ int nl_addr_get(int s, unsigned int ifi, sa_family_t af,
>  	return status;
>  }
>  
> +/**
> + * nl_mac_get() - Get mac address corresponding to given IP address

Nit: MAC address

> + * @s:		Netlink socket
> + * @addr:	IPv4 or IPv6 address
> + * @mac:	Array to place the returned MAC address
> + *
> + * Return: 0 on success, negative error code on failure

Kind of. As far as I understand, this also returns 0 if the MAC address
is not found. Should we perhaps set @mac to all-zeroes in that case,
and say "0 if found or not in table, ..."?

> + */
> +int nl_mac_get(int s, const union inany_addr *addr, unsigned char *mac)
> +{
> +	struct nlmsghdr *nlh;
> +	char buf[NLBUFSIZ];
> +	const void *ip;

I guess you should be able to use inany_equals() and keep addresses as
inany_addr as much as possible.

> +	ssize_t status;
> +	uint32_t seq;
> +	int alen;
> +	struct {
> +		struct nlmsghdr nlh;
> +		struct ndmsg ndm;
> +	} req;

You can actually ask the kernel to do the filtering for you (as long as
NETLINK_GET_STRICT_CHK is available, >= 4.20), by passing a NDA_DST
attribute in the request. See e.g. 'strace ip neigh get ... dev ...'.

As far as I know, even if the device (ndm_ifindex) is mandatory in
ip-neighbour(8), the kernel doesn't actually need it.

By the way, I'm not sure if we want to give a preference to a
particular interface, in case we have different MAC addresses for the
same IP address on different interfaces.

I would say we don't care and we can just pick one because we don't
always have the inbound interface available anyway, but I haven't
really thought it through.

> +
> +	if (IN6_IS_ADDR_V4MAPPED(&addr->a6)) {
> +		ip = &addr->v4mapped.a4;
> +		alen = sizeof(struct in_addr);
> +		req.ndm.ndm_family = AF_INET;
> +	} else {
> +		ip = &addr->a6;
> +		alen = sizeof(struct in6_addr);
> +		req.ndm.ndm_family = AF_INET6;
> +	}
> +
> +	seq = nl_send(s, &req, RTM_GETNEIGH, NLM_F_DUMP, sizeof(req));
> +	nl_foreach_oftype(nlh, status, s, buf, seq, RTM_NEWNEIGH) {
> +		struct ndmsg *ndm = NLMSG_DATA(nlh);
> +		struct rtattr *attr = (struct rtattr *)(ndm + 1);
> +		int attrlen = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ndm));
> +		unsigned char *lladdr = NULL;
> +		void *neigh_ip = NULL;

cppcheck says:

netlink.c:839:18: style: Variable 'lladdr' can be declared as pointer to const [constVariablePointer]
  unsigned char *lladdr = NULL;
                 ^
netlink.c:840:9: style: Variable 'neigh_ip' can be declared as pointer to const [constVariablePointer]
  void *neigh_ip = NULL;
        ^

Try 'make cppcheck' (or running all the tests, that would be even
better).

> +
> +		for (; RTA_OK(attr, attrlen); attr = RTA_NEXT(attr, attrlen)) {
> +			if (attr->rta_type == NDA_DST)
> +				neigh_ip = RTA_DATA(attr);
> +			else if (attr->rta_type == NDA_LLADDR)
> +				lladdr = RTA_DATA(attr);
> +		}
> +
> +		if (!neigh_ip || !lladdr)
> +			continue;
> +
> +		if (!memcmp(neigh_ip, ip, alen)) {

...the filtering is still needed for kernel versions < 4.20 (we also do
it in other functions, such as nl_route_dup()), but in general it
should be unnecessary.

That's important to avoid huge netlink messages and substantial overhead
in case we have a lot of neighbours in the table.

> +			memcpy(mac, lladdr, ETH_ALEN);
> +			return 0;
> +		}
> +	}
> +
> +	return status;
> +}
> +
>  /**
>   * nl_addr_get_ll() - Get first IPv6 link-local address for a given interface
>   * @s:		Netlink socket
> diff --git a/netlink.h b/netlink.h
> index b51e99c..2f674d7 100644
> --- a/netlink.h
> +++ b/netlink.h
> @@ -17,6 +17,7 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
>  		 int s_dst, unsigned int ifi_dst, sa_family_t af);
>  int nl_addr_get(int s, unsigned int ifi, sa_family_t af,
>  		void *addr, int *prefix_len, void *addr_l);
> +int nl_mac_get(int s, const union inany_addr *addr, unsigned char *mac);
>  int nl_addr_set(int s, unsigned int ifi, sa_family_t af,
>  		const void *addr, int prefix_len);
>  int nl_addr_get_ll(int s, unsigned int ifi, struct in6_addr *addr);

-- 
Stefano


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 2/8] arp: respond with true mac address of LAN local remote hosts
  2025-06-12  4:21 ` [PATCH v2 2/8] arp: respond with true mac address of LAN local remote hosts Jon Maloy
@ 2025-06-12 15:17   ` Stefano Brivio
  0 siblings, 0 replies; 18+ messages in thread
From: Stefano Brivio @ 2025-06-12 15:17 UTC (permalink / raw)
  To: Jon Maloy; +Cc: dgibson, passt-dev

On Thu, 12 Jun 2025 00:21:46 -0400
Jon Maloy <jmaloy@redhat.com> wrote:

> When we receive an ARP or NTP request over the tap interface for a

Nit: NDP.

> host on the local network, we respond with that host's real mac
> address.
> 
> The local host, which is acting as a proxy for the default gateway,
> is still exempted from this rule.
> 
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>
> ---
>  arp.c |  9 +++++++++
>  fwd.c |  2 +-
>  fwd.h |  3 ++-
>  ndp.c | 11 +++++++++++
>  4 files changed, 23 insertions(+), 2 deletions(-)
> 
> diff --git a/arp.c b/arp.c
> index fc482bb..fbd8219 100644
> --- a/arp.c
> +++ b/arp.c
> @@ -29,6 +29,7 @@
>  #include "dhcp.h"
>  #include "passt.h"
>  #include "tap.h"
> +#include "netlink.h"
>  
>  /**
>   * arp() - Check if this is a supported ARP message, reply as needed
> @@ -39,6 +40,8 @@
>   */
>  int arp(const struct ctx *c, const struct pool *p)
>  {
> +	union inany_addr translated;
> +	union inany_addr addr;

Those aren't very descriptive, especially 'translated'. What about
'tgt_ipaddr' and 'tgt_ipaddr_nat'? I'm not fond of those names either
but I can't come up with much better right now.

Nit: that could be a single line.

>  	unsigned char swap[4];
>  	struct ethhdr *eh;
>  	struct arphdr *ah;
> @@ -72,6 +75,12 @@ int arp(const struct ctx *c, const struct pool *p)
>  	memcpy(am->tha,		am->sha,	sizeof(am->tha));
>  	memcpy(am->sha,		c->our_tap_mac,	sizeof(am->sha));
>  
> +	/* If remote host on local network - respond with its mac address */

This is partially a full sentence ("If remote host [...] respond ..."),
partially a schematic form of a correspondence ("Remote host [...]:
respond with ..."). In general we stick to either one, not a mix.

> +	inany_from_af(&addr, AF_INET, am->tip);
> +	nat_outbound(c, &addr, &translated);
> +	if (!memcmp(&addr, &translated, sizeof(addr)))

You can use inany_equals() here as well, for consistency, even though
it's always an IPv4 address.

By the way, this way of checking if the target IP address is a
non-local host on the local segment is a bit convoluted, and perhaps
would deserve its own helper outside of this function.

I also wonder if it's correct in all cases: what happens if this is a
local (non-loopback) address of a host interface? Then we'll call
nl_mac_get() which should leave am->sha as it is (because the MAC
address is not found), but the comment above doesn't match anymore.

> +		nl_mac_get(nl_sock, &addr, am->sha);
> +
>  	memcpy(swap,		am->tip,	sizeof(am->tip));
>  	memcpy(am->tip,		am->sip,	sizeof(am->tip));
>  	memcpy(am->sip,		swap,		sizeof(am->sip));
> diff --git a/fwd.c b/fwd.c
> index 250cf56..02ebc9d 100644
> --- a/fwd.c
> +++ b/fwd.c
> @@ -332,7 +332,7 @@ static bool fwd_guest_accessible(const struct ctx *c,
>   * Only handles translations that depend *only* on the address.  Anything
>   * related to specific ports or flows is handled elsewhere.
>   */
> -static void nat_outbound(const struct ctx *c, const union inany_addr *addr,
> +void nat_outbound(const struct ctx *c, const union inany_addr *addr,
>  			 union inany_addr *translated)
>  {
>  	if (inany_equals4(addr, &c->ip4.map_host_loopback))
> diff --git a/fwd.h b/fwd.h
> index 0458a3c..61632f2 100644
> --- a/fwd.h
> +++ b/fwd.h
> @@ -56,5 +56,6 @@ uint8_t fwd_nat_from_splice(const struct ctx *c, uint8_t proto,
>  			    const struct flowside *ini, struct flowside *tgt);
>  uint8_t fwd_nat_from_host(const struct ctx *c, uint8_t proto,
>  			  const struct flowside *ini, struct flowside *tgt);
> -
> +void nat_outbound(const struct ctx *c, const union inany_addr *addr,
> +		  union inany_addr *translated);
>  #endif /* FWD_H */
> diff --git a/ndp.c b/ndp.c
> index 3e15494..d702f37 100644
> --- a/ndp.c
> +++ b/ndp.c
> @@ -32,6 +32,7 @@
>  #include "passt.h"
>  #include "tap.h"
>  #include "log.h"
> +#include "netlink.h"
>  
>  #define	RT_LIFETIME	65535
>  
> @@ -196,6 +197,9 @@ static void ndp_send(const struct ctx *c, const struct in6_addr *dst,
>  static void ndp_na(const struct ctx *c, const struct in6_addr *dst,
>  	    const struct in6_addr *addr)
>  {
> +	union inany_addr translated;
> +	union inany_addr addr_any;
> +
>  	struct ndp_na na = {
>  		.ih = {
>  			.icmp6_type		= NA,
> @@ -215,6 +219,13 @@ static void ndp_na(const struct ctx *c, const struct in6_addr *dst,
>  
>  	memcpy(na.target_l2_addr.mac, c->our_tap_mac, ETH_ALEN);
>  
> +	/* If remote host on local network - respond with its mac address */

In IPv6 terms this is typically called link-layer address.

> +	inany_from_af(&addr_any, AF_INET6, addr);
> +	nat_outbound(c, &addr_any, &translated);
> +
> +	if (!memcmp(&addr_any, &translated, sizeof(addr_any)))
> +		nl_mac_get(nl_sock, &addr_any, na.target_l2_addr.mac);
> +

Same as for arp().

>  	ndp_send(c, dst, &na, sizeof(na));
>  }
>  

-- 
Stefano


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 3/8] flow: add mac address of LAN local remote hosts to flow
  2025-06-12  4:21 ` [PATCH v2 3/8] flow: add mac address of LAN local remote hosts to flow Jon Maloy
@ 2025-06-12 15:17   ` Stefano Brivio
  2025-06-14 13:22     ` David Gibson
  0 siblings, 1 reply; 18+ messages in thread
From: Stefano Brivio @ 2025-06-12 15:17 UTC (permalink / raw)
  To: Jon Maloy; +Cc: dgibson, passt-dev

On Thu, 12 Jun 2025 00:21:47 -0400
Jon Maloy <jmaloy@redhat.com> wrote:

> When communicating with remote hosts on the local network, some guest
> applications want to see the real mac address of that host instead
> of passt/pasta's own tap address. The flowside structure is a convenient
> location for storing that address, so we do that in this commit.
> 
> Note that we don´t add usage of this address in this commit, - that
> will come in later commits.
> 
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>
> ---
>  flow.c | 13 ++++++++++++-
>  flow.h |  2 ++
>  2 files changed, 14 insertions(+), 1 deletion(-)
> 
> diff --git a/flow.c b/flow.c
> index da5c813..fffc817 100644
> --- a/flow.c
> +++ b/flow.c
> @@ -20,6 +20,7 @@
>  #include "flow.h"
>  #include "flow_table.h"
>  #include "repair.h"
> +#include "netlink.h"
>  
>  const char *flow_state_str[] = {
>  	[FLOW_STATE_FREE]	= "FREE",
> @@ -438,7 +439,7 @@ struct flowside *flow_target(const struct ctx *c, union flow *flow,
>  {
>  	char estr[INANY_ADDRSTRLEN], fstr[INANY_ADDRSTRLEN];
>  	struct flow_common *f = &flow->f;
> -	const struct flowside *ini = &f->side[INISIDE];
> +	struct flowside *ini = &f->side[INISIDE];
>  	struct flowside *tgt = &f->side[TGTSIDE];
>  	uint8_t tgtpif = PIF_NONE;
>  
> @@ -446,10 +447,16 @@ struct flowside *flow_target(const struct ctx *c, union flow *flow,
>  	ASSERT(f->type == FLOW_TYPE_NONE);
>  	ASSERT(f->pif[INISIDE] != PIF_NONE && f->pif[TGTSIDE] == PIF_NONE);
>  	ASSERT(flow->f.state == FLOW_STATE_INI);
> +	memcpy(ini->mac, c->our_tap_mac, ETH_ALEN);
> +	memcpy(tgt->mac, c->our_tap_mac, ETH_ALEN);
>  
>  	switch (f->pif[INISIDE]) {
>  	case PIF_TAP:
>  		tgtpif = fwd_nat_from_tap(c, proto, ini, tgt);
> +
> +		/* If remote host on local network - insert its mac address */
> +		if (!memcmp(&tgt->eaddr, &ini->oaddr, sizeof(ini->oaddr)))
> +			nl_mac_get(nl_sock, &ini->oaddr, ini->mac);
>  		break;
>  
>  	case PIF_SPLICE:
> @@ -458,6 +465,10 @@ struct flowside *flow_target(const struct ctx *c, union flow *flow,
>  
>  	case PIF_HOST:
>  		tgtpif = fwd_nat_from_host(c, proto, ini, tgt);
> +
> +		/* If remote host on local network - insert its mac address */
> +		if (!memcmp(&tgt->oaddr, &ini->eaddr, sizeof(ini->eaddr)))
> +			nl_mac_get(nl_sock, &tgt->oaddr, tgt->mac);
>  		break;
>  
>  	default:
> diff --git a/flow.h b/flow.h
> index cac618a..916951b 100644
> --- a/flow.h
> +++ b/flow.h
> @@ -143,12 +143,14 @@ extern const uint8_t flow_proto[];
>   * @oaddr:	Our address (local address from passt's PoV)
>   * @eport:	Endpoint port
>   * @oport:	Our port
> + * @mac:	MAC address of remote endpoint
>   */
>  struct flowside {
>  	union inany_addr	oaddr;
>  	union inany_addr	eaddr;
>  	in_port_t		oport;
>  	in_port_t		eport;
> +	unsigned char 		mac[6];

We'll never have two MAC addresses that are not c->our_tap_mac in a
single flow, right? If that's the case, shouldn't we move this to
flow_common, so that we have just one instance, reflecting our usage?

>  };
>  
>  /**

-- 
Stefano


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 4/8] udp: forward external source mac address through tap interface
  2025-06-12  4:21 ` [PATCH v2 4/8] udp: forward external source mac address through tap interface Jon Maloy
@ 2025-06-12 15:17   ` Stefano Brivio
  0 siblings, 0 replies; 18+ messages in thread
From: Stefano Brivio @ 2025-06-12 15:17 UTC (permalink / raw)
  To: Jon Maloy; +Cc: dgibson, passt-dev

On Thu, 12 Jun 2025 00:21:48 -0400
Jon Maloy <jmaloy@redhat.com> wrote:

> We forward the incoming mac address through the tap interface when
> receiving incoming packets from network local hosts. Packets from
> the local host are excepted from this rule, and are still forwarded
> with the default passt/pasta mac address as source.
> 
> This is a part of the solution to bug #120
> 
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>
> ---
>  udp.c | 29 +++++++++++++----------------
>  1 file changed, 13 insertions(+), 16 deletions(-)
> 
> diff --git a/udp.c b/udp.c
> index 65a52e0..ae8fbaf 100644
> --- a/udp.c
> +++ b/udp.c
> @@ -133,11 +133,8 @@ static int udp_splice_init[IP_VERSIONS][NUM_PORTS];
>  /* UDP header and data for inbound messages */
>  static struct udp_payload_t udp_payload[UDP_MAX_FRAMES];
>  
> -/* Ethernet header for IPv4 frames */
> -static struct ethhdr udp4_eth_hdr;
> -
> -/* Ethernet header for IPv6 frames */
> -static struct ethhdr udp6_eth_hdr;
> +/* Ethernet headers for IPv4 and IPv6 frames */
> +static struct ethhdr udp_eth_hdr[UDP_MAX_FRAMES];

An alternative could be to keep two separated sets of headers. It
avoids setting eh->h_proto every time. I'm not sure if it's worth it,
perhaps a quick throughput test would be a good idea, just to be sure
it's fine (I don't expect any substantial regression).

>  
>  /**
>   * struct udp_meta_t - Pre-cooked headers for UDP packets
> @@ -214,8 +211,10 @@ void udp_portmap_clear(void)
>   */
>  void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s)
>  {
> -	eth_update_mac(&udp4_eth_hdr, eth_d, eth_s);
> -	eth_update_mac(&udp6_eth_hdr, eth_d, eth_s);
> +	int i;
> +
> +	for (i = 0; i < UDP_MAX_FRAMES; i++)
> +		eth_update_mac(&udp_eth_hdr[i], eth_d, eth_s);
>  }
>  
>  /**
> @@ -238,6 +237,7 @@ static void udp_iov_init_one(const struct ctx *c, size_t i)
>  
>  	*siov = IOV_OF_LVALUE(payload->data);
>  
> +	tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(udp_eth_hdr[i]);
>  	tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &meta->taph);
>  	tiov[UDP_IOV_PAYLOAD].iov_base = payload;
>  
> @@ -253,9 +253,6 @@ static void udp_iov_init(const struct ctx *c)
>  {
>  	size_t i;
>  
> -	udp4_eth_hdr.h_proto = htons_constant(ETH_P_IP);
> -	udp6_eth_hdr.h_proto = htons_constant(ETH_P_IPV6);
> -
>  	for (i = 0; i < UDP_MAX_FRAMES; i++)
>  		udp_iov_init_one(c, i);
>  }
> @@ -362,21 +359,21 @@ static void udp_tap_prepare(const struct mmsghdr *mmh,
>  	struct iovec (*tap_iov)[UDP_NUM_IOVS] = &udp_l2_iov[idx];
>  	struct udp_payload_t *bp = &udp_payload[idx];
>  	struct udp_meta_t *bm = &udp_meta[idx];
> +	struct ethhdr *eh = (*tap_iov)[UDP_IOV_ETH].iov_base;

Nit: this could be moved after the declaration of tap_iov, so that we
keep initialisers from longest to shortest.

>  	size_t l4len;
>  
> +	eth_update_mac(eh, 0, toside->mac);
>  	if (!inany_v4(&toside->eaddr) || !inany_v4(&toside->oaddr)) {
>  		l4len = udp_update_hdr6(&bm->ip6h, bp, toside,
>  					mmh[idx].msg_len, no_udp_csum);
> -		tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip6h) +
> -			       sizeof(udp6_eth_hdr));
> -		(*tap_iov)[UDP_IOV_ETH] = IOV_OF_LVALUE(udp6_eth_hdr);
> +		tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip6h) + ETH_HLEN);
> +		eh->h_proto = htons_constant(ETH_P_IPV6);
>  		(*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip6h);
>  	} else {
>  		l4len = udp_update_hdr4(&bm->ip4h, bp, toside,
>  					mmh[idx].msg_len, no_udp_csum);
> -		tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip4h) +
> -			       sizeof(udp4_eth_hdr));
> -		(*tap_iov)[UDP_IOV_ETH] = IOV_OF_LVALUE(udp4_eth_hdr);
> +		tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip4h) + ETH_HLEN);
> +		eh->h_proto = htons_constant(ETH_P_IP);
>  		(*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip4h);
>  	}
>  	(*tap_iov)[UDP_IOV_PAYLOAD].iov_len = l4len;

-- 
Stefano


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 5/8] tcp: forward external source mac address through tap interface
  2025-06-12  4:21 ` [PATCH v2 5/8] tcp: " Jon Maloy
@ 2025-06-12 15:17   ` Stefano Brivio
  0 siblings, 0 replies; 18+ messages in thread
From: Stefano Brivio @ 2025-06-12 15:17 UTC (permalink / raw)
  To: Jon Maloy; +Cc: dgibson, passt-dev

On Thu, 12 Jun 2025 00:21:49 -0400
Jon Maloy <jmaloy@redhat.com> wrote:

> We forward the incoming mac address through the tap interface when
> receiving incoming packets from network local hosts. Packets from
> the local host are excepted from this rule, and are still forwarded
> with the default passt/pasta mac address as source.
> 
> This is a part of the solution to bug #120
> 
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>
> ---
>  tcp.c          |  5 ++++-
>  tcp_buf.c      | 27 +++++++++++++--------------
>  tcp_internal.h |  2 +-
>  tcp_vu.c       |  5 ++---
>  4 files changed, 20 insertions(+), 19 deletions(-)
> 
> diff --git a/tcp.c b/tcp.c
> index f43c1e2..777f345 100644
> --- a/tcp.c
> +++ b/tcp.c
> @@ -920,7 +920,7 @@ static void tcp_fill_header(struct tcphdr *th,
>   * @no_tcp_csum:	Do not set TCP checksum
>   */
>  void tcp_fill_headers(const struct tcp_tap_conn *conn,
> -		      struct tap_hdr *taph,
> +		      struct tap_hdr *taph, struct ethhdr *eh,
>  		      struct iphdr *ip4h, struct ipv6hdr *ip6h,
>  		      struct tcphdr *th, struct iov_tail *payload,
>  		      const uint16_t *ip4_check, uint32_t seq, bool no_tcp_csum)
> @@ -952,6 +952,7 @@ void tcp_fill_headers(const struct tcp_tap_conn *conn,
>  			psum = proto_ipv4_header_psum(l4len, IPPROTO_TCP,
>  						      *src4, *dst4);
>  		}
> +		eh->h_proto = htons_constant(ETH_P_IP);
>  	}
>  
>  	if (ip6h) {
> @@ -972,7 +973,9 @@ void tcp_fill_headers(const struct tcp_tap_conn *conn,
>  						      &ip6h->saddr,
>  						      &ip6h->daddr);
>  		}
> +		eh->h_proto = htons_constant(ETH_P_IPV6);
>  	}
> +	eth_update_mac(eh, 0, tapside->mac);
>  
>  	tcp_fill_header(th, conn, seq);
>  
> diff --git a/tcp_buf.c b/tcp_buf.c
> index d1fca67..2dbeca2 100644
> --- a/tcp_buf.c
> +++ b/tcp_buf.c
> @@ -40,8 +40,7 @@
>  /* Static buffers */
>  
>  /* Ethernet header for IPv4 and IPv6 frames */
> -static struct ethhdr		tcp4_eth_src;
> -static struct ethhdr		tcp6_eth_src;
> +static struct ethhdr		tcp_eth_hdr[TCP_FRAMES_MEM];

Same comment as for the UDP implementation.

>  
>  static struct tap_hdr		tcp_payload_tap_hdr[TCP_FRAMES_MEM];
>  
> @@ -71,8 +70,10 @@ static struct iovec	tcp_l2_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS];
>   */
>  void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s)
>  {
> -	eth_update_mac(&tcp4_eth_src, eth_d, eth_s);
> -	eth_update_mac(&tcp6_eth_src, eth_d, eth_s);
> +	int i;
> +
> +	for (i = 0; i < TCP_FRAMES_MEM; i ++)

While 'i ++' is surely valid, it's somewhat weird and we never add that
extra whitespace elsewhere.

> +		eth_update_mac(&tcp_eth_hdr[i], eth_d, eth_s);
>  }
>  
>  /**
> @@ -85,8 +86,8 @@ void tcp_sock_iov_init(const struct ctx *c)
>  	struct iphdr iph = L2_BUF_IP4_INIT(IPPROTO_TCP);
>  	int i;
>  
> -	tcp6_eth_src.h_proto = htons_constant(ETH_P_IPV6);
> -	tcp4_eth_src.h_proto = htons_constant(ETH_P_IP);
> +	for (i = 0; i < TCP_FRAMES_MEM; i ++)

Same here.

> +		eth_update_mac(&tcp_eth_hdr[i], NULL, c->our_tap_mac);
>  
>  	for (i = 0; i < ARRAY_SIZE(tcp_payload); i++) {
>  		tcp6_payload_ip[i] = ip6;
> @@ -164,6 +165,7 @@ static void tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn,
>  	struct tap_hdr *taph = iov[TCP_IOV_TAP].iov_base;
>  	const struct flowside *tapside = TAPFLOW(conn);
>  	const struct in_addr *a4 = inany_v4(&tapside->oaddr);
> +	struct ethhdr *eh = iov[TCP_IOV_ETH].iov_base;
>  	struct ipv6hdr *ip6h = NULL;
>  	struct iphdr *ip4h = NULL;
>  
> @@ -172,7 +174,7 @@ static void tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn,
>  	else
>  		ip6h = iov[TCP_IOV_IP].iov_base;
>  
> -	tcp_fill_headers(conn, taph, ip4h, ip6h, th, &tail,
> +	tcp_fill_headers(conn, taph, eh, ip4h, ip6h, th, &tail,
>  			 check, seq, no_tcp_csum);
>  }
>  
> @@ -194,14 +196,12 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
>  	int ret;
>  
>  	iov = tcp_l2_iov[tcp_payload_used];
> -	if (CONN_V4(conn)) {
> +	if (CONN_V4(conn))
>  		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[tcp_payload_used]);
> -		iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
> -	} else {
> +	else
>  		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
> -		iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
> -	}
>  
> +	iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp_eth_hdr[tcp_payload_used]);
>  	payload = iov[TCP_IOV_PAYLOAD].iov_base;
>  	seq = conn->seq_to_tap;
>  	ret = tcp_prepare_flags(c, conn, flags, &payload->th,
> @@ -259,11 +259,10 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
>  			check = &iph->check;
>  		}
>  		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[tcp_payload_used]);
> -		iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
>  	} else if (CONN_V6(conn)) {
>  		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
> -		iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
>  	}
> +	iov[TCP_IOV_ETH].iov_base = &tcp_eth_hdr[tcp_payload_used];
>  	payload = iov[TCP_IOV_PAYLOAD].iov_base;
>  	payload->th.th_off = sizeof(struct tcphdr) / 4;
>  	payload->th.th_x2 = 0;
> diff --git a/tcp_internal.h b/tcp_internal.h
> index 36c6533..6c2d1ef 100644
> --- a/tcp_internal.h
> +++ b/tcp_internal.h
> @@ -167,7 +167,7 @@ void tcp_rst_do(const struct ctx *c, struct tcp_tap_conn *conn);
>  struct tcp_info_linux;
>  
>  void tcp_fill_headers(const struct tcp_tap_conn *conn,
> -		      struct tap_hdr *taph,
> +		      struct tap_hdr *taph, struct ethhdr *eh,
>  		      struct iphdr *ip4h, struct ipv6hdr *ip6h,
>  		      struct tcphdr *th, struct iov_tail *payload,
>  		      const uint16_t *ip4_check, uint32_t seq, bool no_tcp_csum);
> diff --git a/tcp_vu.c b/tcp_vu.c
> index f3914c7..da1fb37 100644
> --- a/tcp_vu.c
> +++ b/tcp_vu.c
> @@ -135,7 +135,7 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
>  	flags_elem[0].in_sg[0].iov_len = hdrlen + optlen;
>  	payload = IOV_TAIL(flags_elem[0].in_sg, 1, hdrlen);
>  
> -	tcp_fill_headers(conn, NULL, ip4h, ip6h, th, &payload,
> +	tcp_fill_headers(conn, NULL, eh, ip4h, ip6h, th, &payload,
>  			 NULL, seq, !*c->pcap);
>  
>  	if (*c->pcap) {
> @@ -315,7 +315,6 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
>  	eh = vu_eth(base);
>  
>  	memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest));
> -	memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source));
>  
>  	/* initialize header */
>  
> @@ -339,7 +338,7 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
>  	th->ack = 1;
>  	th->psh = push;
>  
> -	tcp_fill_headers(conn, NULL, ip4h, ip6h, th, &payload,
> +	tcp_fill_headers(conn, NULL, eh, ip4h, ip6h, th, &payload,
>  			 *check, conn->seq_to_tap, no_tcp_csum);
>  	if (ip4h)
>  		*check = &ip4h->check;

-- 
Stefano


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 6/8] tap: change signature of function tap_push_l2h()
  2025-06-12  4:21 ` [PATCH v2 6/8] tap: change signature of function tap_push_l2h() Jon Maloy
@ 2025-06-12 15:17   ` Stefano Brivio
  0 siblings, 0 replies; 18+ messages in thread
From: Stefano Brivio @ 2025-06-12 15:17 UTC (permalink / raw)
  To: Jon Maloy; +Cc: dgibson, passt-dev

On Thu, 12 Jun 2025 00:21:50 -0400
Jon Maloy <jmaloy@redhat.com> wrote:

> In the following commits it must be possible for the callers of
> funtion tap_push_l2h() to specify which source mac address should
> be added to the ethernet header sent over the tap interface. As
> a preparation, we now add a new argument to that function, still
> without actually using it.
> 
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>
> ---
>  tap.c | 18 +++++++++++-------
>  tap.h |  3 ++-
>  tcp.c |  4 ++--
>  3 files changed, 15 insertions(+), 10 deletions(-)
> 
> diff --git a/tap.c b/tap.c
> index 6db5d88..634c012 100644
> --- a/tap.c
> +++ b/tap.c
> @@ -171,17 +171,21 @@ const struct in6_addr *tap_ip6_daddr(const struct ctx *c,
>   * tap_push_l2h() - Build an L2 header for an inbound packet
>   * @c:		Execution context
>   * @buf:	Buffer address at which to generate header
> + * @src_mac:	MAC address to be used as source for message

Maybe ", NULL means default"?

>   * @proto:	Ethernet protocol number for L3
>   *
>   * Return: pointer at which to write the packet's payload
>   */
> -void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto)
> +void *tap_push_l2h(const struct ctx *c, void *buf,
> +		   const void *src_mac, uint16_t proto)
>  {
>  	struct ethhdr *eh = (struct ethhdr *)buf;
>  
> -	/* TODO: ARP table lookup */
>  	memcpy(eh->h_dest, c->guest_mac, ETH_ALEN);
> -	memcpy(eh->h_source, c->our_tap_mac, ETH_ALEN);
> +	if (src_mac)
> +		memcpy(eh->h_source, src_mac, ETH_ALEN);
> +	else
> +		memcpy(eh->h_source, c->our_tap_mac, ETH_ALEN);
>  	eh->h_proto = ntohs(proto);
>  	return eh + 1;
>  }
> @@ -261,7 +265,7 @@ void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
>  {
>  	size_t l4len = dlen + sizeof(struct udphdr);
>  	char buf[USHRT_MAX];
> -	struct iphdr *ip4h = tap_push_l2h(c, buf, ETH_P_IP);
> +	struct iphdr *ip4h = tap_push_l2h(c, buf, NULL, ETH_P_IP);
>  	struct udphdr *uh = tap_push_ip4h(ip4h, src, dst, l4len, IPPROTO_UDP);
>  	char *data = tap_push_uh4(uh, src, sport, dst, dport, in, dlen);
>  
> @@ -281,7 +285,7 @@ void tap_icmp4_send(const struct ctx *c, struct in_addr src, struct in_addr dst,
>  		    const void *in, size_t l4len)
>  {
>  	char buf[USHRT_MAX];
> -	struct iphdr *ip4h = tap_push_l2h(c, buf, ETH_P_IP);
> +	struct iphdr *ip4h = tap_push_l2h(c, buf, NULL, ETH_P_IP);
>  	struct icmphdr *icmp4h = tap_push_ip4h(ip4h, src, dst,
>  					       l4len, IPPROTO_ICMP);
>  
> @@ -367,7 +371,7 @@ void tap_udp6_send(const struct ctx *c,
>  {
>  	size_t l4len = dlen + sizeof(struct udphdr);
>  	char buf[USHRT_MAX];
> -	struct ipv6hdr *ip6h = tap_push_l2h(c, buf, ETH_P_IPV6);
> +	struct ipv6hdr *ip6h = tap_push_l2h(c, buf, NULL, ETH_P_IPV6);
>  	struct udphdr *uh = tap_push_ip6h(ip6h, src, dst,
>  					  l4len, IPPROTO_UDP, flow);
>  	char *data = tap_push_uh6(uh, src, sport, dst, dport, in, dlen);
> @@ -389,7 +393,7 @@ void tap_icmp6_send(const struct ctx *c,
>  		    const void *in, size_t l4len)
>  {
>  	char buf[USHRT_MAX];
> -	struct ipv6hdr *ip6h = tap_push_l2h(c, buf, ETH_P_IPV6);
> +	struct ipv6hdr *ip6h = tap_push_l2h(c, buf, NULL, ETH_P_IPV6);
>  	struct icmp6hdr *icmp6h = tap_push_ip6h(ip6h, src, dst, l4len,
>  						IPPROTO_ICMPV6, 0);
>  
> diff --git a/tap.h b/tap.h
> index 6fe3d15..e640d95 100644
> --- a/tap.h
> +++ b/tap.h
> @@ -70,7 +70,8 @@ static inline void tap_hdr_update(struct tap_hdr *thdr, size_t l2len)
>  }
>  
>  unsigned long tap_l2_max_len(const struct ctx *c);
> -void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto);
> +void *tap_push_l2h(const struct ctx *c, void *buf,
> +		   const void *src_mac, uint16_t proto);
>  void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
>  		     struct in_addr dst, size_t l4len, uint8_t proto);
>  void *tap_push_uh4(struct udphdr *uh, struct in_addr src, in_port_t sport,
> diff --git a/tcp.c b/tcp.c
> index 777f345..1a32424 100644
> --- a/tcp.c
> +++ b/tcp.c
> @@ -1898,7 +1898,7 @@ static void tcp_rst_no_conn(const struct ctx *c, int af,
>  		return;
>  
>  	if (af == AF_INET) {
> -		struct iphdr *ip4h = tap_push_l2h(c, buf, ETH_P_IP);
> +		struct iphdr *ip4h = tap_push_l2h(c, buf, NULL, ETH_P_IP);
>  		const struct in_addr *rst_src = daddr;
>  		const struct in_addr *rst_dst = saddr;
>  
> @@ -1908,7 +1908,7 @@ static void tcp_rst_no_conn(const struct ctx *c, int af,
>  					      *rst_src, *rst_dst);
>  
>  	} else {
> -		struct ipv6hdr *ip6h = tap_push_l2h(c, buf, ETH_P_IPV6);
> +		struct ipv6hdr *ip6h = tap_push_l2h(c, buf, NULL, ETH_P_IPV6);
>  		const struct in6_addr *rst_src = daddr;
>  		const struct in6_addr *rst_dst = saddr;
>  

-- 
Stefano


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 7/8] tcp: make tcp_rst_no_conn() respond with correct mac address
  2025-06-12  4:21 ` [PATCH v2 7/8] tcp: make tcp_rst_no_conn() respond with correct mac address Jon Maloy
@ 2025-06-12 15:17   ` Stefano Brivio
  0 siblings, 0 replies; 18+ messages in thread
From: Stefano Brivio @ 2025-06-12 15:17 UTC (permalink / raw)
  To: Jon Maloy; +Cc: dgibson, passt-dev

On Thu, 12 Jun 2025 00:21:51 -0400
Jon Maloy <jmaloy@redhat.com> wrote:

> tcp_rst_no_conn() needs to identify and specify which source mac
> address to use when sending an RST to the guest. This is because
> it doesn't have access to any flow structure where this address
> could be fetched.
> 
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>
> ---
>  tcp.c | 15 +++++++++++++--
>  1 file changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/tcp.c b/tcp.c
> index 1a32424..b49f603 100644
> --- a/tcp.c
> +++ b/tcp.c
> @@ -309,6 +309,7 @@
>  #include "tcp_internal.h"
>  #include "tcp_buf.h"
>  #include "tcp_vu.h"
> +#include "netlink.h"
>  
>  #ifndef __USE_MISC
>  /* From Linux UAPI, missing in netinet/tcp.h provided by musl */
> @@ -1888,6 +1889,9 @@ static void tcp_rst_no_conn(const struct ctx *c, int af,
>  			    const struct tcphdr *th, size_t l4len)
>  {
>  	struct iov_tail payload = IOV_TAIL(NULL, 0, 0);
> +	unsigned char src_mac[ETH_ALEN];
> +	union inany_addr translated;
> +	union inany_addr dst;

Same comment as previous patches, here, and...

>  	struct tcphdr *rsth;
>  	char buf[USHRT_MAX];
>  	uint32_t psum = 0;
> @@ -1897,8 +1901,15 @@ static void tcp_rst_no_conn(const struct ctx *c, int af,
>  	if (th->rst)
>  		return;
>  
> +	/* If remote host on local network - respond with its mac address */
> +	memcpy(src_mac, c->our_tap_mac, ETH_ALEN);
> +	inany_from_af(&dst, af, daddr);
> +	nat_outbound(c, &dst, &translated);
> +	if (!memcmp(&dst, &translated, sizeof(dst)))
> +		nl_mac_get(nl_sock, &dst, src_mac);

here. The rest of the series looks good to me!

-- 
Stefano


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 1/8] netlink: Add function to extract mac addresses from arp table
  2025-06-12 15:17   ` Stefano Brivio
@ 2025-06-13  6:11     ` David Gibson
  0 siblings, 0 replies; 18+ messages in thread
From: David Gibson @ 2025-06-13  6:11 UTC (permalink / raw)
  To: Stefano Brivio; +Cc: Jon Maloy, dgibson, passt-dev

[-- Attachment #1: Type: text/plain, Size: 1247 bytes --]

On Thu, Jun 12, 2025 at 05:17:05PM +0200, Stefano Brivio wrote:
> On Thu, 12 Jun 2025 00:21:45 -0400
> Jon Maloy <jmaloy@redhat.com> wrote:
> 
> > The solution to bug #120 requires the ability to translate from
> > an IP address to its corresponding MAC address in cases where
> > those are present in the ARP/NTP table.
> 
> Nit: NDP.
> 
> > We add this feature here.
> > 
> > Signed-off-by: Jon Maloy <jmaloy@redhat.com>
> 
> Nit: David's address is david@gibson.dropbear.id.au (in case you need
> to Cc: him specifically).

Yes, thanks.

[snip]
> > +	if (IN6_IS_ADDR_V4MAPPED(&addr->a6)) {

inany_v4() exists for exactly this sort of case.  It will both test if
the address is a v4 address, and extract it if so, so you shouldn't
need to poke into the innards of union inany_addr.

> > +		ip = &addr->v4mapped.a4;
> > +		alen = sizeof(struct in_addr);
> > +		req.ndm.ndm_family = AF_INET;
> > +	} else {
> > +		ip = &addr->a6;
> > +		alen = sizeof(struct in6_addr);
> > +		req.ndm.ndm_family = AF_INET6;
> > +	}

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 3/8] flow: add mac address of LAN local remote hosts to flow
  2025-06-12 15:17   ` Stefano Brivio
@ 2025-06-14 13:22     ` David Gibson
  0 siblings, 0 replies; 18+ messages in thread
From: David Gibson @ 2025-06-14 13:22 UTC (permalink / raw)
  To: Stefano Brivio; +Cc: Jon Maloy, dgibson, passt-dev

[-- Attachment #1: Type: text/plain, Size: 6245 bytes --]

On Thu, Jun 12, 2025 at 05:17:29PM +0200, Stefano Brivio wrote:
> On Thu, 12 Jun 2025 00:21:47 -0400
> Jon Maloy <jmaloy@redhat.com> wrote:
> 
> > When communicating with remote hosts on the local network, some guest
> > applications want to see the real mac address of that host instead
> > of passt/pasta's own tap address. The flowside structure is a convenient
> > location for storing that address, so we do that in this commit.
> > 
> > Note that we don´t add usage of this address in this commit, - that
> > will come in later commits.
> > 
> > Signed-off-by: Jon Maloy <jmaloy@redhat.com>
> > ---
> >  flow.c | 13 ++++++++++++-
> >  flow.h |  2 ++
> >  2 files changed, 14 insertions(+), 1 deletion(-)
> > 
> > diff --git a/flow.c b/flow.c
> > index da5c813..fffc817 100644
> > --- a/flow.c
> > +++ b/flow.c
> > @@ -20,6 +20,7 @@
> >  #include "flow.h"
> >  #include "flow_table.h"
> >  #include "repair.h"
> > +#include "netlink.h"
> >  
> >  const char *flow_state_str[] = {
> >  	[FLOW_STATE_FREE]	= "FREE",
> > @@ -438,7 +439,7 @@ struct flowside *flow_target(const struct ctx *c, union flow *flow,
> >  {
> >  	char estr[INANY_ADDRSTRLEN], fstr[INANY_ADDRSTRLEN];
> >  	struct flow_common *f = &flow->f;
> > -	const struct flowside *ini = &f->side[INISIDE];
> > +	struct flowside *ini = &f->side[INISIDE];
> >  	struct flowside *tgt = &f->side[TGTSIDE];
> >  	uint8_t tgtpif = PIF_NONE;
> >  
> > @@ -446,10 +447,16 @@ struct flowside *flow_target(const struct ctx *c, union flow *flow,
> >  	ASSERT(f->type == FLOW_TYPE_NONE);
> >  	ASSERT(f->pif[INISIDE] != PIF_NONE && f->pif[TGTSIDE] == PIF_NONE);
> >  	ASSERT(flow->f.state == FLOW_STATE_INI);
> > +	memcpy(ini->mac, c->our_tap_mac, ETH_ALEN);
> > +	memcpy(tgt->mac, c->our_tap_mac, ETH_ALEN);
> >  
> >  	switch (f->pif[INISIDE]) {
> >  	case PIF_TAP:
> >  		tgtpif = fwd_nat_from_tap(c, proto, ini, tgt);
> > +
> > +		/* If remote host on local network - insert its mac address */
> > +		if (!memcmp(&tgt->eaddr, &ini->oaddr, sizeof(ini->oaddr)))
> > +			nl_mac_get(nl_sock, &ini->oaddr, ini->mac);
> >  		break;
> >  
> >  	case PIF_SPLICE:
> > @@ -458,6 +465,10 @@ struct flowside *flow_target(const struct ctx *c, union flow *flow,
> >  
> >  	case PIF_HOST:
> >  		tgtpif = fwd_nat_from_host(c, proto, ini, tgt);
> > +
> > +		/* If remote host on local network - insert its mac address */
> > +		if (!memcmp(&tgt->oaddr, &ini->eaddr, sizeof(ini->eaddr)))
> > +			nl_mac_get(nl_sock, &tgt->oaddr, tgt->mac);
> >  		break;
> >  
> >  	default:
> > diff --git a/flow.h b/flow.h
> > index cac618a..916951b 100644
> > --- a/flow.h
> > +++ b/flow.h
> > @@ -143,12 +143,14 @@ extern const uint8_t flow_proto[];
> >   * @oaddr:	Our address (local address from passt's PoV)
> >   * @eport:	Endpoint port
> >   * @oport:	Our port
> > + * @mac:	MAC address of remote endpoint
> >   */
> >  struct flowside {
> >  	union inany_addr	oaddr;
> >  	union inany_addr	eaddr;
> >  	in_port_t		oport;
> >  	in_port_t		eport;
> > +	unsigned char 		mac[6];
> 
> We'll never have two MAC addresses that are not c->our_tap_mac in a

So... there are several things to unpack here.

1)

This series introduces cases where the statement as written above will
not be true any more. Specifically it add cases where the (tap side)
MACs are:
	- a real MAC from the host's LAN
and	- the guest's MAC

Neither of these is our_tap_mac.

1a)

I'm guessing you meant c->guest_mac, rather than c->our_tap_mac above.
In that case it's true for the time being.  It would cease to be true
if we started supporting multiple bridged guests behind a single
passt/pasta instance (in fact c->guest_mac would have to cease to
exist to allow that).

2)

If this were to go into flowside, it should be "omac" instead of just
"mac".  It's specifically "our" MAC in the sense that it's for frames
going to/from from passt itself, rather than to/from the guest
(a.k.a. the endpoint).

3)

I don't think putting this in flowside makes sense for a different
reason: putting it in flowside implies we need to track it for both
the host side and the tap side.

There are not 2, but *4* MAC addresses involved in a flow - just as
there are 4 IP addresses.  The two MACs on the host side, and the two
MACs on the tap side.

I don't think the host side omac will ever be useful.  We can't
control it, and it can change without warning due to reconfiguration
on the host.  Even without reconfiguration, multiple host interfaces
could mean the host side omac is non-constant, and non-trivial to
determine for a specific packet/flow.  In fact if there's multipath
routing, it might not even be constant for a single flow.

Host side emac is what we're determining by looking at the host ARP
table.  But all we're doing with it is using it to initialise the
tap-side omac, so I don't think we need to track it separately.  It
could also change during the life of a flow.

If the host interface is point to point (e.g. wireguard) there will be
no host side MACs at all.  If the host interface is something other
than ethernet there might be MACs, but they could have a different
format.

Tap-side omac is the key new concept in this series: we're allowing it
to be set per-flow, rather than being a global constant (our_tap_mac).

Tap-side emac remains a global constant (guest_mac) even with this
series. As above, we'd need to change that to handle multiple bridged
guests on a single passt instance.

So, to accomplish what we need for this series, we only really need to
track tap-side omac.  I agree with Stefano that it makes more sense in
flow_common than flowside.  In fact, it's even more specific than
that, since it doesn't make sense for spliced flows (since they go via
guest loopback the guest side traffic has no MACs at all).  But we
don't currently have a location for pif-specific but not
protocol-specific flow information, so flow_common is the closest we
have.

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2025-06-14 13:23 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-06-12  4:21 [PATCH v2 0/8] use true mac address of LAN local remote hosts Jon Maloy
2025-06-12  4:21 ` [PATCH v2 1/8] netlink: Add function to extract mac addresses from arp table Jon Maloy
2025-06-12 15:17   ` Stefano Brivio
2025-06-13  6:11     ` David Gibson
2025-06-12  4:21 ` [PATCH v2 2/8] arp: respond with true mac address of LAN local remote hosts Jon Maloy
2025-06-12 15:17   ` Stefano Brivio
2025-06-12  4:21 ` [PATCH v2 3/8] flow: add mac address of LAN local remote hosts to flow Jon Maloy
2025-06-12 15:17   ` Stefano Brivio
2025-06-14 13:22     ` David Gibson
2025-06-12  4:21 ` [PATCH v2 4/8] udp: forward external source mac address through tap interface Jon Maloy
2025-06-12 15:17   ` Stefano Brivio
2025-06-12  4:21 ` [PATCH v2 5/8] tcp: " Jon Maloy
2025-06-12 15:17   ` Stefano Brivio
2025-06-12  4:21 ` [PATCH v2 6/8] tap: change signature of function tap_push_l2h() Jon Maloy
2025-06-12 15:17   ` Stefano Brivio
2025-06-12  4:21 ` [PATCH v2 7/8] tcp: make tcp_rst_no_conn() respond with correct mac address Jon Maloy
2025-06-12 15:17   ` Stefano Brivio
2025-06-12  4:21 ` [PATCH v2 8/8] icmp: let icmp use mac address from flowside structure Jon Maloy

Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).