public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
* [PATCH v5 0/4]  Reconstruct ICMP headers for failed UDP connect
@ 2025-02-23 22:59 Jon Maloy
  2025-02-23 22:59 ` [PATCH v5 1/4] tap: break out building of udp header from tap_udp4_send function Jon Maloy
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Jon Maloy @ 2025-02-23 22:59 UTC (permalink / raw)
  To: passt-dev, sbrivio, lvivier, dgibson, jmaloy

Reconstruct incoming ICMP headers for failed UDP connect and forward back
to local peer.

v2: - Added patch breaking out udp header creation from function
      tap_udp4_send().
    - Updated the ICMP creation by using the new function.
    - Added logics to find correct flow, depending on origin.
    - All done after feedback from David Gibson.
v3: - More changes after feedback from David Gibson.
v4: - Even more changes after feedback from D. Gibson
v5: - Added corresponding patches for IPv6

Jon Maloy (4):
  tap: break out building of udp header from tap_udp4_send function
  udp: create and send ICMPv4 to local peer when applicable
  tap: break out building of udp header from tap_udp6_send function
  udp: create and send ICMPv6 to local peer when applicable

 tap.c          |  84 ++++++++++++++++++++++++----------
 tap.h          |  15 +++++++
 udp.c          | 119 ++++++++++++++++++++++++++++++++++++++++++++-----
 udp_internal.h |   2 +-
 udp_vu.c       |   4 +-
 5 files changed, 187 insertions(+), 37 deletions(-)

-- 
2.48.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v5 1/4] tap: break out building of udp header from tap_udp4_send function
  2025-02-23 22:59 [PATCH v5 0/4] Reconstruct ICMP headers for failed UDP connect Jon Maloy
@ 2025-02-23 22:59 ` Jon Maloy
  2025-02-23 22:59 ` [PATCH v5 2/4] udp: create and send ICMPv4 to local peer when applicable Jon Maloy
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 8+ messages in thread
From: Jon Maloy @ 2025-02-23 22:59 UTC (permalink / raw)
  To: passt-dev, sbrivio, lvivier, dgibson, jmaloy

We will need to build the UDP header at other locations than in function
tap_udp4_send(), so we break that part out to a separate function.

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 tap.c | 32 +++++++++++++++++++++++++-------
 tap.h |  5 +++++
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/tap.c b/tap.c
index 44b0fc0..932eef6 100644
--- a/tap.c
+++ b/tap.c
@@ -163,7 +163,7 @@ static void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
 }
 
 /**
- * tap_udp4_send() - Send UDP over IPv4 packet
+ * tap_push_uh4() - Build UDPv4 header with checksum
  * @c:		Execution context
  * @src:	IPv4 source address
  * @sport:	UDP source port
@@ -172,15 +172,11 @@ static void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
  * @in:		UDP payload contents (not including UDP header)
  * @dlen:	UDP payload length (not including UDP header)
  */
-void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
+void *tap_push_uh4(struct udphdr *uh, struct in_addr src, in_port_t sport,
 		   struct in_addr dst, in_port_t dport,
 		   const void *in, size_t dlen)
 {
 	size_t l4len = dlen + sizeof(struct udphdr);
-	char buf[USHRT_MAX];
-	struct iphdr *ip4h = tap_push_l2h(c, buf, ETH_P_IP);
-	struct udphdr *uh = tap_push_ip4h(ip4h, src, dst, l4len, IPPROTO_UDP);
-	char *data = (char *)(uh + 1);
 	const struct iovec iov = {
 		.iov_base = (void *)in,
 		.iov_len = dlen
@@ -191,8 +187,30 @@ void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
 	uh->dest = htons(dport);
 	uh->len = htons(l4len);
 	csum_udp4(uh, src, dst, &payload);
-	memcpy(data, in, dlen);
+	return uh + 1;
+}
+
+/**
+ * tap_udp4_send() - Send UDP over IPv4 packet
+ * @c:		Execution context
+ * @src:	IPv4 source address
+ * @sport:	UDP source port
+ * @dst:	IPv4 destination address
+ * @dport:	UDP destination port
+ * @in:		UDP payload contents (not including UDP header)
+ * @dlen:	UDP payload length (not including UDP header)
+ */
+void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
+		   struct in_addr dst, in_port_t dport,
+		   const void *in, size_t dlen)
+{
+	size_t l4len = dlen + sizeof(struct udphdr);
+	char buf[USHRT_MAX];
+	struct iphdr *ip4h = tap_push_l2h(c, buf, ETH_P_IP);
+	struct udphdr *uh = tap_push_ip4h(ip4h, src, dst, l4len, IPPROTO_UDP);
+	char *data = tap_push_uh4(uh, src, sport, dst, dport, in, dlen);
 
+	memcpy(data, in, dlen);
 	tap_send_single(c, buf, dlen + (data - buf));
 }
 
diff --git a/tap.h b/tap.h
index a476a12..0b3eb93 100644
--- a/tap.h
+++ b/tap.h
@@ -6,6 +6,8 @@
 #ifndef TAP_H
 #define TAP_H
 
+struct udphdr;
+
 /**
  * struct tap_hdr - tap backend specific headers
  * @vnet_len:	Frame length (for qemu socket transport)
@@ -42,6 +44,9 @@ static inline void tap_hdr_update(struct tap_hdr *thdr, size_t l2len)
 		thdr->vnet_len = htonl(l2len);
 }
 
+void *tap_push_uh4(struct udphdr *uh, struct in_addr src, in_port_t sport,
+		   struct in_addr dst, in_port_t dport,
+		   const void *in, size_t dlen);
 void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
 		   struct in_addr dst, in_port_t dport,
 		   const void *in, size_t dlen);
-- 
@@ -6,6 +6,8 @@
 #ifndef TAP_H
 #define TAP_H
 
+struct udphdr;
+
 /**
  * struct tap_hdr - tap backend specific headers
  * @vnet_len:	Frame length (for qemu socket transport)
@@ -42,6 +44,9 @@ static inline void tap_hdr_update(struct tap_hdr *thdr, size_t l2len)
 		thdr->vnet_len = htonl(l2len);
 }
 
+void *tap_push_uh4(struct udphdr *uh, struct in_addr src, in_port_t sport,
+		   struct in_addr dst, in_port_t dport,
+		   const void *in, size_t dlen);
 void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
 		   struct in_addr dst, in_port_t dport,
 		   const void *in, size_t dlen);
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v5 2/4] udp: create and send ICMPv4 to local peer when applicable
  2025-02-23 22:59 [PATCH v5 0/4] Reconstruct ICMP headers for failed UDP connect Jon Maloy
  2025-02-23 22:59 ` [PATCH v5 1/4] tap: break out building of udp header from tap_udp4_send function Jon Maloy
@ 2025-02-23 22:59 ` Jon Maloy
  2025-02-24  2:13   ` David Gibson
  2025-02-23 22:59 ` [PATCH v5 3/4] tap: break out building of udp header from tap_udp6_send function Jon Maloy
  2025-02-23 22:59 ` [PATCH v5 4/4] udp: create and send ICMPv6 to local peer when applicable Jon Maloy
  3 siblings, 1 reply; 8+ messages in thread
From: Jon Maloy @ 2025-02-23 22:59 UTC (permalink / raw)
  To: passt-dev, sbrivio, lvivier, dgibson, jmaloy

When a local peer sends a UDP message to a non-existing port on an
existing remote host, that host will return an ICMP message containing
the error code ICMP_PORT_UNREACH, plus the header and the first eight
bytes of the original message. If the sender socket has been connected,
it uses this message to issue a "Connection Refused" event to the user.

Until now, we have only read such events from the externally facing
socket, but we don't forward them back to the local sender because
we cannot read the ICMP message directly to user space. Because of
this, the local peer will hang and wait for a response that never
arrives.

We now fix this for IPv4 by recreating and forwarding a correct ICMP
message back to the internal sender. We synthesize the message based
on the information in the extended error structure, plus the returned
part of the original message body.

Note that for the sake of completeness, we even produce ICMP messages
for other error codes. We have noticed that at least ICMP_PROT_UNREACH
is propagated as an error event back to the user.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
v2: - Updated the ICMP creation to use the new function tap_push_uh4().
    - Added logics to find correct flow, depending on origin.
    - All done after feedback from David Gibson.
v3: - Passing parameter 'now' along with call to udp_sock_errs() call.
    - Corrected lookup of flow from listener socket
    - All done after feedback from David Gibson.
v4: - Eliminate any attempt to handle ICMPs from listener sockets.
    - After feedback from David Gibson.
v5: - Small reshuffle in anticipation of following commits.
---
 tap.c          |  4 +--
 tap.h          |  2 ++
 udp.c          | 72 +++++++++++++++++++++++++++++++++++++++++++-------
 udp_internal.h |  2 +-
 udp_vu.c       |  4 +--
 5 files changed, 69 insertions(+), 15 deletions(-)

diff --git a/tap.c b/tap.c
index 932eef6..3daba28 100644
--- a/tap.c
+++ b/tap.c
@@ -143,8 +143,8 @@ static void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto)
  *
  * Return: pointer at which to write the packet's payload
  */
-static void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
-			   struct in_addr dst, size_t l4len, uint8_t proto)
+void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
+		    struct in_addr dst, size_t l4len, uint8_t proto)
 {
 	uint16_t l3len = l4len + sizeof(*ip4h);
 
diff --git a/tap.h b/tap.h
index 0b3eb93..37da21d 100644
--- a/tap.h
+++ b/tap.h
@@ -47,6 +47,8 @@ static inline void tap_hdr_update(struct tap_hdr *thdr, size_t l2len)
 void *tap_push_uh4(struct udphdr *uh, struct in_addr src, in_port_t sport,
 		   struct in_addr dst, in_port_t dport,
 		   const void *in, size_t dlen);
+void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
+		    struct in_addr dst, size_t l4len, uint8_t proto);
 void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
 		   struct in_addr dst, in_port_t dport,
 		   const void *in, size_t dlen);
diff --git a/udp.c b/udp.c
index 923cc38..3dfd478 100644
--- a/udp.c
+++ b/udp.c
@@ -87,6 +87,7 @@
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/udp.h>
+#include <netinet/ip_icmp.h>
 #include <stdint.h>
 #include <stddef.h>
 #include <string.h>
@@ -402,25 +403,69 @@ static void udp_tap_prepare(const struct mmsghdr *mmh,
 	(*tap_iov)[UDP_IOV_PAYLOAD].iov_len = l4len;
 }
 
+/**
+ * udp_send_conn_fail_icmp4() - Construct and send ICMP to local peer
+ * @c:	          Execution context
+ * @ee:	  Extended error descriptor
+ * @ref:	  epoll reference
+ * @in:	  First bytes (max 8) of original UDP message body
+ * @dlen:	  Length of the read part of original UDP message body
+ */
+static void udp_send_conn_fail_icmp4(const struct ctx *c,
+				     const struct sock_extended_err *ee,
+				     const struct flowside *toside,
+				     void *in, size_t dlen)
+{
+	struct in_addr oaddr = toside->oaddr.v4mapped.a4;
+	struct in_addr eaddr = toside->eaddr.v4mapped.a4;
+	in_port_t eport = toside->eport;
+	in_port_t oport = toside->oport;
+	struct {
+		struct icmphdr icmp4h;
+		struct iphdr ip4h;
+		struct udphdr uh;
+		char data[8];
+	} __attribute__((packed, aligned(__alignof__(max_align_t)))) msg;
+	size_t msglen = sizeof(msg) - sizeof(msg.data) + dlen;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.icmp4h.type = ee->ee_type;
+	msg.icmp4h.code = ee->ee_code;
+
+	/* Reconstruct the original headers as returned in the ICMP message */
+	tap_push_ip4h(&msg.ip4h, eaddr, oaddr, dlen, IPPROTO_UDP);
+	tap_push_uh4(&msg.uh, eaddr, eport, oaddr, oport, in, dlen);
+	memcpy(&msg.data, in, dlen);
+
+	tap_icmp4_send(c, oaddr, eaddr, &msg, msglen);
+}
+
 /**
  * udp_sock_recverr() - Receive and clear an error from a socket
- * @s:		Socket to receive from
+ * @c:	          Execution context
+ * @ref:	  epoll reference
  *
  * Return: 1 if error received and processed, 0 if no more errors in queue, < 0
  *         if there was an error reading the queue
  *
  * #syscalls recvmsg
  */
-static int udp_sock_recverr(int s)
+static int udp_sock_recverr(const struct ctx *c, union epoll_ref ref)
 {
 	const struct sock_extended_err *ee;
 	const struct cmsghdr *hdr;
 	char buf[CMSG_SPACE(sizeof(*ee))];
+	char data[8];
+	int s = ref.fd;
+	struct iovec iov = {
+		.iov_base = data,
+		.iov_len = sizeof(data)
+	};
 	struct msghdr mh = {
 		.msg_name = NULL,
 		.msg_namelen = 0,
-		.msg_iov = NULL,
-		.msg_iovlen = 0,
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
 		.msg_control = buf,
 		.msg_controllen = sizeof(buf),
 	};
@@ -450,8 +495,14 @@ static int udp_sock_recverr(int s)
 	}
 
 	ee = (const struct sock_extended_err *)CMSG_DATA(hdr);
+	if (ref.type == EPOLL_TYPE_UDP_REPLY) {
+		flow_sidx_t sidx = flow_sidx_opposite(ref.flowside);
+		const struct flowside *toside = flowside_at_sidx(sidx);
 
-	/* TODO: When possible propagate and otherwise handle errors */
+		udp_send_conn_fail_icmp4(c, ee, toside, data, rc);
+	} else {
+		trace("Ignoring received cmsg on listener socket");
+	}
 	debug("%s error on UDP socket %i: %s",
 	      str_ee_origin(ee), s, strerror_(ee->ee_errno));
 
@@ -461,15 +512,16 @@ static int udp_sock_recverr(int s)
 /**
  * udp_sock_errs() - Process errors on a socket
  * @c:		Execution context
- * @s:		Socket to receive from
+ * @ref:        epoll reference
  * @events:	epoll events bitmap
  *
  * Return: Number of errors handled, or < 0 if we have an unrecoverable error
  */
-int udp_sock_errs(const struct ctx *c, int s, uint32_t events)
+int udp_sock_errs(const struct ctx *c, union epoll_ref ref, uint32_t events)
 {
 	unsigned n_err = 0;
 	socklen_t errlen;
+	int s = ref.fd;
 	int rc, err;
 
 	ASSERT(!c->no_udp);
@@ -478,7 +530,7 @@ int udp_sock_errs(const struct ctx *c, int s, uint32_t events)
 		return 0; /* Nothing to do */
 
 	/* Empty the error queue */
-	while ((rc = udp_sock_recverr(s)) > 0)
+	while ((rc = udp_sock_recverr(c, ref)) > 0)
 		n_err += rc;
 
 	if (rc < 0)
@@ -558,7 +610,7 @@ static void udp_buf_listen_sock_handler(const struct ctx *c,
 	const socklen_t sasize = sizeof(udp_meta[0].s_in);
 	int n, i;
 
-	if (udp_sock_errs(c, ref.fd, events) < 0) {
+	if (udp_sock_errs(c, ref, events) < 0) {
 		err("UDP: Unrecoverable error on listening socket:"
 		    " (%s port %hu)", pif_name(ref.udp.pif), ref.udp.port);
 		/* FIXME: what now?  close/re-open socket? */
@@ -661,7 +713,7 @@ static void udp_buf_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
 
 	from_s = uflow->s[ref.flowside.sidei];
 
-	if (udp_sock_errs(c, from_s, events) < 0) {
+	if (udp_sock_errs(c, ref, events) < 0) {
 		flow_err(uflow, "Unrecoverable error on reply socket");
 		flow_err_details(uflow);
 		udp_flow_close(c, uflow);
diff --git a/udp_internal.h b/udp_internal.h
index cc80e30..3b081f5 100644
--- a/udp_internal.h
+++ b/udp_internal.h
@@ -30,5 +30,5 @@ size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
 size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
                        const struct flowside *toside, size_t dlen,
 		       bool no_udp_csum);
-int udp_sock_errs(const struct ctx *c, int s, uint32_t events);
+int udp_sock_errs(const struct ctx *c, union epoll_ref ref, uint32_t events);
 #endif /* UDP_INTERNAL_H */
diff --git a/udp_vu.c b/udp_vu.c
index 4123510..c26a223 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -227,7 +227,7 @@ void udp_vu_listen_sock_handler(const struct ctx *c, union epoll_ref ref,
 	struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
 	int i;
 
-	if (udp_sock_errs(c, ref.fd, events) < 0) {
+	if (udp_sock_errs(c, ref, events) < 0) {
 		err("UDP: Unrecoverable error on listening socket:"
 		    " (%s port %hu)", pif_name(ref.udp.pif), ref.udp.port);
 		return;
@@ -302,7 +302,7 @@ void udp_vu_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
 
 	ASSERT(!c->no_udp);
 
-	if (udp_sock_errs(c, from_s, events) < 0) {
+	if (udp_sock_errs(c, ref, events) < 0) {
 		flow_err(uflow, "Unrecoverable error on reply socket");
 		flow_err_details(uflow);
 		udp_flow_close(c, uflow);
-- 
@@ -227,7 +227,7 @@ void udp_vu_listen_sock_handler(const struct ctx *c, union epoll_ref ref,
 	struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
 	int i;
 
-	if (udp_sock_errs(c, ref.fd, events) < 0) {
+	if (udp_sock_errs(c, ref, events) < 0) {
 		err("UDP: Unrecoverable error on listening socket:"
 		    " (%s port %hu)", pif_name(ref.udp.pif), ref.udp.port);
 		return;
@@ -302,7 +302,7 @@ void udp_vu_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
 
 	ASSERT(!c->no_udp);
 
-	if (udp_sock_errs(c, from_s, events) < 0) {
+	if (udp_sock_errs(c, ref, events) < 0) {
 		flow_err(uflow, "Unrecoverable error on reply socket");
 		flow_err_details(uflow);
 		udp_flow_close(c, uflow);
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v5 3/4] tap: break out building of udp header from tap_udp6_send function
  2025-02-23 22:59 [PATCH v5 0/4] Reconstruct ICMP headers for failed UDP connect Jon Maloy
  2025-02-23 22:59 ` [PATCH v5 1/4] tap: break out building of udp header from tap_udp4_send function Jon Maloy
  2025-02-23 22:59 ` [PATCH v5 2/4] udp: create and send ICMPv4 to local peer when applicable Jon Maloy
@ 2025-02-23 22:59 ` Jon Maloy
  2025-02-24  2:16   ` David Gibson
  2025-02-23 22:59 ` [PATCH v5 4/4] udp: create and send ICMPv6 to local peer when applicable Jon Maloy
  3 siblings, 1 reply; 8+ messages in thread
From: Jon Maloy @ 2025-02-23 22:59 UTC (permalink / raw)
  To: passt-dev, sbrivio, lvivier, dgibson, jmaloy

We will need to build the UDP header at other locations than in function
tap_udp6_send(), so we break that part out to a separate function.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 tap.c | 40 ++++++++++++++++++++++++++++++----------
 tap.h |  4 ++++
 2 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/tap.c b/tap.c
index 3daba28..380a7b9 100644
--- a/tap.c
+++ b/tap.c
@@ -266,7 +266,7 @@ static void *tap_push_ip6h(struct ipv6hdr *ip6h,
 }
 
 /**
- * tap_udp6_send() - Send UDP over IPv6 packet
+ * tap_push_uh6() - Build UDPv6 header with checksum
  * @c:		Execution context
  * @src:	IPv6 source address
  * @sport:	UDP source port
@@ -276,19 +276,14 @@ static void *tap_push_ip6h(struct ipv6hdr *ip6h,
  * @in:		UDP payload contents (not including UDP header)
  * @dlen:	UDP payload length (not including UDP header)
  */
-void tap_udp6_send(const struct ctx *c,
+void *tap_push_uh6(struct udphdr *uh,
 		   const struct in6_addr *src, in_port_t sport,
 		   const struct in6_addr *dst, in_port_t dport,
-		   uint32_t flow, void *in, size_t dlen)
+		   void *in, size_t dlen)
 {
 	size_t l4len = dlen + sizeof(struct udphdr);
-	char buf[USHRT_MAX];
-	struct ipv6hdr *ip6h = tap_push_l2h(c, buf, ETH_P_IPV6);
-	struct udphdr *uh = tap_push_ip6h(ip6h, src, dst,
-					  l4len, IPPROTO_UDP, flow);
-	char *data = (char *)(uh + 1);
 	const struct iovec iov = {
-		.iov_base = in,
+		.iov_base = (void *)in,
 		.iov_len = dlen
 	};
 	struct iov_tail payload = IOV_TAIL(&iov, 1, 0);
@@ -297,8 +292,33 @@ void tap_udp6_send(const struct ctx *c,
 	uh->dest = htons(dport);
 	uh->len = htons(l4len);
 	csum_udp6(uh, src, dst, &payload);
-	memcpy(data, in, dlen);
+	return uh + 1;
+}
+
+/**
+ * tap_udp6_send() - Send UDP over IPv6 packet
+ * @c:		Execution context
+ * @src:	IPv6 source address
+ * @sport:	UDP source port
+ * @dst:	IPv6 destination address
+ * @dport:	UDP destination port
+ * @flow:	Flow label
+ * @in:		UDP payload contents (not including UDP header)
+ * @dlen:	UDP payload length (not including UDP header)
+ */
+void tap_udp6_send(const struct ctx *c,
+		   const struct in6_addr *src, in_port_t sport,
+		   const struct in6_addr *dst, in_port_t dport,
+		   uint32_t flow, void *in, size_t dlen)
+{
+	size_t l4len = dlen + sizeof(struct udphdr);
+	char buf[USHRT_MAX];
+	struct ipv6hdr *ip6h = tap_push_l2h(c, buf, ETH_P_IPV6);
+	struct udphdr *uh = tap_push_ip6h(ip6h, src, dst,
+					  l4len, IPPROTO_UDP, flow);
+	char *data = tap_push_uh6(uh, src, sport, dst, dport, in, dlen);
 
+	memcpy(data, in, dlen);
 	tap_send_single(c, buf, dlen + (data - buf));
 }
 
diff --git a/tap.h b/tap.h
index 37da21d..b7b8cef 100644
--- a/tap.h
+++ b/tap.h
@@ -47,6 +47,10 @@ static inline void tap_hdr_update(struct tap_hdr *thdr, size_t l2len)
 void *tap_push_uh4(struct udphdr *uh, struct in_addr src, in_port_t sport,
 		   struct in_addr dst, in_port_t dport,
 		   const void *in, size_t dlen);
+void *tap_push_uh6(struct udphdr *uh,
+		   const struct in6_addr *src, in_port_t sport,
+		   const struct in6_addr *dst, in_port_t dport,
+		   void *in, size_t dlen);
 void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
 		    struct in_addr dst, size_t l4len, uint8_t proto);
 void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
-- 
@@ -47,6 +47,10 @@ static inline void tap_hdr_update(struct tap_hdr *thdr, size_t l2len)
 void *tap_push_uh4(struct udphdr *uh, struct in_addr src, in_port_t sport,
 		   struct in_addr dst, in_port_t dport,
 		   const void *in, size_t dlen);
+void *tap_push_uh6(struct udphdr *uh,
+		   const struct in6_addr *src, in_port_t sport,
+		   const struct in6_addr *dst, in_port_t dport,
+		   void *in, size_t dlen);
 void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
 		    struct in_addr dst, size_t l4len, uint8_t proto);
 void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v5 4/4] udp: create and send ICMPv6 to local peer when applicable
  2025-02-23 22:59 [PATCH v5 0/4] Reconstruct ICMP headers for failed UDP connect Jon Maloy
                   ` (2 preceding siblings ...)
  2025-02-23 22:59 ` [PATCH v5 3/4] tap: break out building of udp header from tap_udp6_send function Jon Maloy
@ 2025-02-23 22:59 ` Jon Maloy
  2025-02-24  2:21   ` David Gibson
  3 siblings, 1 reply; 8+ messages in thread
From: Jon Maloy @ 2025-02-23 22:59 UTC (permalink / raw)
  To: passt-dev, sbrivio, lvivier, dgibson, jmaloy

When a local peer sends a UDP message to a non-existing port on an
existing remote host, that host will return an ICMP message containing
the error code ICMP_PORT_UNREACH, plus the header and the first eight
bytes of the original message. If the sender socket has been connected,
it uses this message to issue a "Connection Refused" event to the user.

Until now, we have only read such events from the externally facing
socket, but we don't forward them back to the local sender because
we cannot read the ICMP message directly to user space. Because of
this, the local peer will hang and wait for a response that never
arrives.

We now fix this for IPv6 by recreating and forwarding a correct ICMP
message back to the internal sender. We synthesize the message based
on the information in the extended error structure, plus the returned
part of the original message body.

Note that for the sake of completeness, we even produce ICMP messages
for other error codes. We have noticed that at least ICMP_PROT_UNREACH
is propagated as an error event back to the user.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 tap.c |  8 ++++----
 tap.h |  4 ++++
 udp.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 54 insertions(+), 5 deletions(-)

diff --git a/tap.c b/tap.c
index 380a7b9..31aec62 100644
--- a/tap.c
+++ b/tap.c
@@ -247,10 +247,10 @@ void tap_icmp4_send(const struct ctx *c, struct in_addr src, struct in_addr dst,
  *
  * Return: pointer at which to write the packet's payload
  */
-static void *tap_push_ip6h(struct ipv6hdr *ip6h,
-			   const struct in6_addr *src,
-			   const struct in6_addr *dst,
-			   size_t l4len, uint8_t proto, uint32_t flow)
+void *tap_push_ip6h(struct ipv6hdr *ip6h,
+		    const struct in6_addr *src,
+		    const struct in6_addr *dst,
+		    size_t l4len, uint8_t proto, uint32_t flow)
 {
 	ip6h->payload_len = htons(l4len);
 	ip6h->priority = 0;
diff --git a/tap.h b/tap.h
index b7b8cef..67aa9e6 100644
--- a/tap.h
+++ b/tap.h
@@ -53,6 +53,10 @@ void *tap_push_uh6(struct udphdr *uh,
 		   void *in, size_t dlen);
 void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
 		    struct in_addr dst, size_t l4len, uint8_t proto);
+void *tap_push_ip6h(struct ipv6hdr *ip6h,
+		    const struct in6_addr *src,
+		    const struct in6_addr *dst,
+		    size_t l4len, uint8_t proto, uint32_t flow);
 void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
 		   struct in_addr dst, in_port_t dport,
 		   const void *in, size_t dlen);
diff --git a/udp.c b/udp.c
index 3dfd478..0e37fb3 100644
--- a/udp.c
+++ b/udp.c
@@ -88,6 +88,7 @@
 #include <netinet/ip.h>
 #include <netinet/udp.h>
 #include <netinet/ip_icmp.h>
+#include <netinet/icmp6.h>
 #include <stdint.h>
 #include <stddef.h>
 #include <string.h>
@@ -440,6 +441,46 @@ static void udp_send_conn_fail_icmp4(const struct ctx *c,
 	tap_icmp4_send(c, oaddr, eaddr, &msg, msglen);
 }
 
+
+/**
+ * udp_send_conn_fail_icmp6() - Construct and send ICMP to local peer
+ * @c:	          Execution context
+ * @ee:	  Extended error descriptor
+ * @ref:	  epoll reference
+ * @in:	  First bytes (max 8) of original UDP message body
+ * @dlen:	  Length of the read part of original UDP message body
+ * @flow:	  IPv6 flow identifier
+ */
+static void udp_send_conn_fail_icmp6(const struct ctx *c,
+				     const struct sock_extended_err *ee,
+				     const struct flowside *toside,
+				     void *in, size_t dlen, uint32_t flow)
+{
+	const struct in6_addr *oaddr = &toside->oaddr.a6;
+	const struct in6_addr *eaddr = &toside->eaddr.a6;
+	in_port_t eport = toside->eport;
+	in_port_t oport = toside->oport;
+	struct {
+		struct icmp6_hdr icmp6h;
+		struct ipv6hdr ip6h;
+		struct udphdr uh;
+		char data[8];
+	} __attribute__((packed, aligned(__alignof__(max_align_t)))) msg;
+	size_t msglen = sizeof(msg) - sizeof(msg.data) + dlen;
+	size_t l4len = dlen + sizeof(struct udphdr);
+
+	memset(&msg, 0, sizeof(msg));
+	msg.icmp6h.icmp6_type = ee->ee_type;
+	msg.icmp6h.icmp6_code = ee->ee_code;
+
+	/* Reconstruct the original headers as returned in the ICMP message */
+	tap_push_ip6h(&msg.ip6h, eaddr, oaddr, l4len, IPPROTO_UDP, flow);
+	tap_push_uh6(&msg.uh, eaddr, eport, oaddr, oport, in, dlen);
+	memcpy(&msg.data, in, dlen);
+
+	tap_icmp6_send(c, oaddr, eaddr, &msg, msglen);
+}
+
 /**
  * udp_sock_recverr() - Receive and clear an error from a socket
  * @c:	          Execution context
@@ -498,8 +539,12 @@ static int udp_sock_recverr(const struct ctx *c, union epoll_ref ref)
 	if (ref.type == EPOLL_TYPE_UDP_REPLY) {
 		flow_sidx_t sidx = flow_sidx_opposite(ref.flowside);
 		const struct flowside *toside = flowside_at_sidx(sidx);
+		uint32_t flow = sidx.flowi;
 
-		udp_send_conn_fail_icmp4(c, ee, toside, data, rc);
+		if (ee->ee_type == ICMP_DEST_UNREACH)
+			udp_send_conn_fail_icmp4(c, ee, toside, data, rc);
+		else if (ee->ee_type == ICMP6_DST_UNREACH)
+			udp_send_conn_fail_icmp6(c, ee, toside, data, rc, flow);
 	} else {
 		trace("Ignoring received cmsg on listener socket");
 	}
-- 
@@ -88,6 +88,7 @@
 #include <netinet/ip.h>
 #include <netinet/udp.h>
 #include <netinet/ip_icmp.h>
+#include <netinet/icmp6.h>
 #include <stdint.h>
 #include <stddef.h>
 #include <string.h>
@@ -440,6 +441,46 @@ static void udp_send_conn_fail_icmp4(const struct ctx *c,
 	tap_icmp4_send(c, oaddr, eaddr, &msg, msglen);
 }
 
+
+/**
+ * udp_send_conn_fail_icmp6() - Construct and send ICMP to local peer
+ * @c:	          Execution context
+ * @ee:	  Extended error descriptor
+ * @ref:	  epoll reference
+ * @in:	  First bytes (max 8) of original UDP message body
+ * @dlen:	  Length of the read part of original UDP message body
+ * @flow:	  IPv6 flow identifier
+ */
+static void udp_send_conn_fail_icmp6(const struct ctx *c,
+				     const struct sock_extended_err *ee,
+				     const struct flowside *toside,
+				     void *in, size_t dlen, uint32_t flow)
+{
+	const struct in6_addr *oaddr = &toside->oaddr.a6;
+	const struct in6_addr *eaddr = &toside->eaddr.a6;
+	in_port_t eport = toside->eport;
+	in_port_t oport = toside->oport;
+	struct {
+		struct icmp6_hdr icmp6h;
+		struct ipv6hdr ip6h;
+		struct udphdr uh;
+		char data[8];
+	} __attribute__((packed, aligned(__alignof__(max_align_t)))) msg;
+	size_t msglen = sizeof(msg) - sizeof(msg.data) + dlen;
+	size_t l4len = dlen + sizeof(struct udphdr);
+
+	memset(&msg, 0, sizeof(msg));
+	msg.icmp6h.icmp6_type = ee->ee_type;
+	msg.icmp6h.icmp6_code = ee->ee_code;
+
+	/* Reconstruct the original headers as returned in the ICMP message */
+	tap_push_ip6h(&msg.ip6h, eaddr, oaddr, l4len, IPPROTO_UDP, flow);
+	tap_push_uh6(&msg.uh, eaddr, eport, oaddr, oport, in, dlen);
+	memcpy(&msg.data, in, dlen);
+
+	tap_icmp6_send(c, oaddr, eaddr, &msg, msglen);
+}
+
 /**
  * udp_sock_recverr() - Receive and clear an error from a socket
  * @c:	          Execution context
@@ -498,8 +539,12 @@ static int udp_sock_recverr(const struct ctx *c, union epoll_ref ref)
 	if (ref.type == EPOLL_TYPE_UDP_REPLY) {
 		flow_sidx_t sidx = flow_sidx_opposite(ref.flowside);
 		const struct flowside *toside = flowside_at_sidx(sidx);
+		uint32_t flow = sidx.flowi;
 
-		udp_send_conn_fail_icmp4(c, ee, toside, data, rc);
+		if (ee->ee_type == ICMP_DEST_UNREACH)
+			udp_send_conn_fail_icmp4(c, ee, toside, data, rc);
+		else if (ee->ee_type == ICMP6_DST_UNREACH)
+			udp_send_conn_fail_icmp6(c, ee, toside, data, rc, flow);
 	} else {
 		trace("Ignoring received cmsg on listener socket");
 	}
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v5 2/4] udp: create and send ICMPv4 to local peer when applicable
  2025-02-23 22:59 ` [PATCH v5 2/4] udp: create and send ICMPv4 to local peer when applicable Jon Maloy
@ 2025-02-24  2:13   ` David Gibson
  0 siblings, 0 replies; 8+ messages in thread
From: David Gibson @ 2025-02-24  2:13 UTC (permalink / raw)
  To: Jon Maloy; +Cc: passt-dev, sbrivio, lvivier, dgibson

[-- Attachment #1: Type: text/plain, Size: 2009 bytes --]

On Sun, Feb 23, 2025 at 05:59:49PM -0500, Jon Maloy wrote:
> When a local peer sends a UDP message to a non-existing port on an
> existing remote host, that host will return an ICMP message containing
> the error code ICMP_PORT_UNREACH, plus the header and the first eight
> bytes of the original message. If the sender socket has been connected,
> it uses this message to issue a "Connection Refused" event to the user.
> 
> Until now, we have only read such events from the externally facing
> socket, but we don't forward them back to the local sender because
> we cannot read the ICMP message directly to user space. Because of
> this, the local peer will hang and wait for a response that never
> arrives.
> 
> We now fix this for IPv4 by recreating and forwarding a correct ICMP
> message back to the internal sender. We synthesize the message based
> on the information in the extended error structure, plus the returned
> part of the original message body.
> 
> Note that for the sake of completeness, we even produce ICMP messages
> for other error codes. We have noticed that at least ICMP_PROT_UNREACH
> is propagated as an error event back to the user.
> 
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

With the exception of a formatting nit:

> +/**
> + * udp_send_conn_fail_icmp4() - Construct and send ICMP to local peer
> + * @c:	          Execution context
> + * @ee:	  Extended error descriptor
> + * @ref:	  epoll reference
> + * @in:	  First bytes (max 8) of original UDP message body
> + * @dlen:	  Length of the read part of original UDP message body

Many of these function comments are inconsistent on whether the
parameter descriptions are separated by spaces or tabs (it should be
tabs).

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v5 3/4] tap: break out building of udp header from tap_udp6_send function
  2025-02-23 22:59 ` [PATCH v5 3/4] tap: break out building of udp header from tap_udp6_send function Jon Maloy
@ 2025-02-24  2:16   ` David Gibson
  0 siblings, 0 replies; 8+ messages in thread
From: David Gibson @ 2025-02-24  2:16 UTC (permalink / raw)
  To: Jon Maloy; +Cc: passt-dev, sbrivio, lvivier, dgibson

[-- Attachment #1: Type: text/plain, Size: 4075 bytes --]

On Sun, Feb 23, 2025 at 05:59:50PM -0500, Jon Maloy wrote:
> We will need to build the UDP header at other locations than in function
> tap_udp6_send(), so we break that part out to a separate function.
> 
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

...minus one nit.

> ---
>  tap.c | 40 ++++++++++++++++++++++++++++++----------
>  tap.h |  4 ++++
>  2 files changed, 34 insertions(+), 10 deletions(-)
> 
> diff --git a/tap.c b/tap.c
> index 3daba28..380a7b9 100644
> --- a/tap.c
> +++ b/tap.c
> @@ -266,7 +266,7 @@ static void *tap_push_ip6h(struct ipv6hdr *ip6h,
>  }
>  
>  /**
> - * tap_udp6_send() - Send UDP over IPv6 packet
> + * tap_push_uh6() - Build UDPv6 header with checksum
>   * @c:		Execution context
>   * @src:	IPv6 source address
>   * @sport:	UDP source port
> @@ -276,19 +276,14 @@ static void *tap_push_ip6h(struct ipv6hdr *ip6h,
>   * @in:		UDP payload contents (not including UDP header)
>   * @dlen:	UDP payload length (not including UDP header)
>   */
> -void tap_udp6_send(const struct ctx *c,
> +void *tap_push_uh6(struct udphdr *uh,
>  		   const struct in6_addr *src, in_port_t sport,
>  		   const struct in6_addr *dst, in_port_t dport,
> -		   uint32_t flow, void *in, size_t dlen)
> +		   void *in, size_t dlen)
>  {
>  	size_t l4len = dlen + sizeof(struct udphdr);
> -	char buf[USHRT_MAX];
> -	struct ipv6hdr *ip6h = tap_push_l2h(c, buf, ETH_P_IPV6);
> -	struct udphdr *uh = tap_push_ip6h(ip6h, src, dst,
> -					  l4len, IPPROTO_UDP, flow);
> -	char *data = (char *)(uh + 1);
>  	const struct iovec iov = {
> -		.iov_base = in,
> +		.iov_base = (void *)in,

'in' is already a void * so the cast is not necessary (even ignoring
the fact that you can generall coerce to and from void * without a
cast).

>  		.iov_len = dlen
>  	};
>  	struct iov_tail payload = IOV_TAIL(&iov, 1, 0);
> @@ -297,8 +292,33 @@ void tap_udp6_send(const struct ctx *c,
>  	uh->dest = htons(dport);
>  	uh->len = htons(l4len);
>  	csum_udp6(uh, src, dst, &payload);
> -	memcpy(data, in, dlen);
> +	return uh + 1;
> +}
> +
> +/**
> + * tap_udp6_send() - Send UDP over IPv6 packet
> + * @c:		Execution context
> + * @src:	IPv6 source address
> + * @sport:	UDP source port
> + * @dst:	IPv6 destination address
> + * @dport:	UDP destination port
> + * @flow:	Flow label
> + * @in:		UDP payload contents (not including UDP header)
> + * @dlen:	UDP payload length (not including UDP header)
> + */
> +void tap_udp6_send(const struct ctx *c,
> +		   const struct in6_addr *src, in_port_t sport,
> +		   const struct in6_addr *dst, in_port_t dport,
> +		   uint32_t flow, void *in, size_t dlen)
> +{
> +	size_t l4len = dlen + sizeof(struct udphdr);
> +	char buf[USHRT_MAX];
> +	struct ipv6hdr *ip6h = tap_push_l2h(c, buf, ETH_P_IPV6);
> +	struct udphdr *uh = tap_push_ip6h(ip6h, src, dst,
> +					  l4len, IPPROTO_UDP, flow);
> +	char *data = tap_push_uh6(uh, src, sport, dst, dport, in, dlen);
>  
> +	memcpy(data, in, dlen);
>  	tap_send_single(c, buf, dlen + (data - buf));
>  }
>  
> diff --git a/tap.h b/tap.h
> index 37da21d..b7b8cef 100644
> --- a/tap.h
> +++ b/tap.h
> @@ -47,6 +47,10 @@ static inline void tap_hdr_update(struct tap_hdr *thdr, size_t l2len)
>  void *tap_push_uh4(struct udphdr *uh, struct in_addr src, in_port_t sport,
>  		   struct in_addr dst, in_port_t dport,
>  		   const void *in, size_t dlen);
> +void *tap_push_uh6(struct udphdr *uh,
> +		   const struct in6_addr *src, in_port_t sport,
> +		   const struct in6_addr *dst, in_port_t dport,
> +		   void *in, size_t dlen);
>  void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
>  		    struct in_addr dst, size_t l4len, uint8_t proto);
>  void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v5 4/4] udp: create and send ICMPv6 to local peer when applicable
  2025-02-23 22:59 ` [PATCH v5 4/4] udp: create and send ICMPv6 to local peer when applicable Jon Maloy
@ 2025-02-24  2:21   ` David Gibson
  0 siblings, 0 replies; 8+ messages in thread
From: David Gibson @ 2025-02-24  2:21 UTC (permalink / raw)
  To: Jon Maloy; +Cc: passt-dev, sbrivio, lvivier, dgibson

[-- Attachment #1: Type: text/plain, Size: 6297 bytes --]

On Sun, Feb 23, 2025 at 05:59:51PM -0500, Jon Maloy wrote:
> When a local peer sends a UDP message to a non-existing port on an
> existing remote host, that host will return an ICMP message containing
> the error code ICMP_PORT_UNREACH, plus the header and the first eight
> bytes of the original message. If the sender socket has been connected,
> it uses this message to issue a "Connection Refused" event to the user.
> 
> Until now, we have only read such events from the externally facing
> socket, but we don't forward them back to the local sender because
> we cannot read the ICMP message directly to user space. Because of
> this, the local peer will hang and wait for a response that never
> arrives.
> 
> We now fix this for IPv6 by recreating and forwarding a correct ICMP
> message back to the internal sender. We synthesize the message based
> on the information in the extended error structure, plus the returned
> part of the original message body.
> 
> Note that for the sake of completeness, we even produce ICMP messages
> for other error codes. We have noticed that at least ICMP_PROT_UNREACH
> is propagated as an error event back to the user.
> 
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>
> ---
>  tap.c |  8 ++++----
>  tap.h |  4 ++++
>  udp.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
>  3 files changed, 54 insertions(+), 5 deletions(-)
> 
> diff --git a/tap.c b/tap.c
> index 380a7b9..31aec62 100644
> --- a/tap.c
> +++ b/tap.c
> @@ -247,10 +247,10 @@ void tap_icmp4_send(const struct ctx *c, struct in_addr src, struct in_addr dst,
>   *
>   * Return: pointer at which to write the packet's payload
>   */
> -static void *tap_push_ip6h(struct ipv6hdr *ip6h,
> -			   const struct in6_addr *src,
> -			   const struct in6_addr *dst,
> -			   size_t l4len, uint8_t proto, uint32_t flow)
> +void *tap_push_ip6h(struct ipv6hdr *ip6h,
> +		    const struct in6_addr *src,
> +		    const struct in6_addr *dst,
> +		    size_t l4len, uint8_t proto, uint32_t flow)
>  {
>  	ip6h->payload_len = htons(l4len);
>  	ip6h->priority = 0;
> diff --git a/tap.h b/tap.h
> index b7b8cef..67aa9e6 100644
> --- a/tap.h
> +++ b/tap.h
> @@ -53,6 +53,10 @@ void *tap_push_uh6(struct udphdr *uh,
>  		   void *in, size_t dlen);
>  void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
>  		    struct in_addr dst, size_t l4len, uint8_t proto);
> +void *tap_push_ip6h(struct ipv6hdr *ip6h,
> +		    const struct in6_addr *src,
> +		    const struct in6_addr *dst,
> +		    size_t l4len, uint8_t proto, uint32_t flow);
>  void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
>  		   struct in_addr dst, in_port_t dport,
>  		   const void *in, size_t dlen);
> diff --git a/udp.c b/udp.c
> index 3dfd478..0e37fb3 100644
> --- a/udp.c
> +++ b/udp.c
> @@ -88,6 +88,7 @@
>  #include <netinet/ip.h>
>  #include <netinet/udp.h>
>  #include <netinet/ip_icmp.h>
> +#include <netinet/icmp6.h>
>  #include <stdint.h>
>  #include <stddef.h>
>  #include <string.h>
> @@ -440,6 +441,46 @@ static void udp_send_conn_fail_icmp4(const struct ctx *c,
>  	tap_icmp4_send(c, oaddr, eaddr, &msg, msglen);
>  }
>  
> +
> +/**
> + * udp_send_conn_fail_icmp6() - Construct and send ICMP to local peer

s/ICMP/ICMPv6/

> + * @c:	          Execution context
> + * @ee:	  Extended error descriptor
> + * @ref:	  epoll reference
> + * @in:	  First bytes (max 8) of original UDP message body
> + * @dlen:	  Length of the read part of original UDP message body
> + * @flow:	  IPv6 flow identifier

As with 2/4 a bunch of these function comments seem to use
inconsistent tabs/spaces before the parameter descriptions.

> + */
> +static void udp_send_conn_fail_icmp6(const struct ctx *c,
> +				     const struct sock_extended_err *ee,
> +				     const struct flowside *toside,
> +				     void *in, size_t dlen, uint32_t flow)
> +{
> +	const struct in6_addr *oaddr = &toside->oaddr.a6;
> +	const struct in6_addr *eaddr = &toside->eaddr.a6;
> +	in_port_t eport = toside->eport;
> +	in_port_t oport = toside->oport;
> +	struct {
> +		struct icmp6_hdr icmp6h;
> +		struct ipv6hdr ip6h;
> +		struct udphdr uh;
> +		char data[8];
> +	} __attribute__((packed, aligned(__alignof__(max_align_t)))) msg;
> +	size_t msglen = sizeof(msg) - sizeof(msg.data) + dlen;
> +	size_t l4len = dlen + sizeof(struct udphdr);
> +
> +	memset(&msg, 0, sizeof(msg));
> +	msg.icmp6h.icmp6_type = ee->ee_type;
> +	msg.icmp6h.icmp6_code = ee->ee_code;
> +
> +	/* Reconstruct the original headers as returned in the ICMP message */
> +	tap_push_ip6h(&msg.ip6h, eaddr, oaddr, l4len, IPPROTO_UDP, flow);
> +	tap_push_uh6(&msg.uh, eaddr, eport, oaddr, oport, in, dlen);
> +	memcpy(&msg.data, in, dlen);
> +
> +	tap_icmp6_send(c, oaddr, eaddr, &msg, msglen);
> +}
> +
>  /**
>   * udp_sock_recverr() - Receive and clear an error from a socket
>   * @c:	          Execution context
> @@ -498,8 +539,12 @@ static int udp_sock_recverr(const struct ctx *c, union epoll_ref ref)
>  	if (ref.type == EPOLL_TYPE_UDP_REPLY) {
>  		flow_sidx_t sidx = flow_sidx_opposite(ref.flowside);
>  		const struct flowside *toside = flowside_at_sidx(sidx);
> +		uint32_t flow = sidx.flowi;
>  
> -		udp_send_conn_fail_icmp4(c, ee, toside, data, rc);
> +		if (ee->ee_type == ICMP_DEST_UNREACH)
> +			udp_send_conn_fail_icmp4(c, ee, toside, data, rc);
> +		else if (ee->ee_type == ICMP6_DST_UNREACH)
> +			udp_send_conn_fail_icmp6(c, ee, toside, data, rc, flow);

Ah... there's another little wrinkle I didn't consider looking at 2/4.
This will work or now, but at some point we want to allow passt to be
configured to NAT between IPv4 and IPv6.  That means that we shouldn't
base whether we sent an ICMP or ICMPv6 to the guest on whether we
received an ICMP or ICMPv6 on the socket side.  Instead we should look
at whether the guest is using IPv6 for this flow, so using inany_v4()
the addresses in toside.

>  	} else {
>  		trace("Ignoring received cmsg on listener socket");
>  	}

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2025-02-24  2:21 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-02-23 22:59 [PATCH v5 0/4] Reconstruct ICMP headers for failed UDP connect Jon Maloy
2025-02-23 22:59 ` [PATCH v5 1/4] tap: break out building of udp header from tap_udp4_send function Jon Maloy
2025-02-23 22:59 ` [PATCH v5 2/4] udp: create and send ICMPv4 to local peer when applicable Jon Maloy
2025-02-24  2:13   ` David Gibson
2025-02-23 22:59 ` [PATCH v5 3/4] tap: break out building of udp header from tap_udp6_send function Jon Maloy
2025-02-24  2:16   ` David Gibson
2025-02-23 22:59 ` [PATCH v5 4/4] udp: create and send ICMPv6 to local peer when applicable Jon Maloy
2025-02-24  2:21   ` David Gibson

Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).