From: Jon Maloy <jmaloy@redhat.com>
To: passt-dev@passt.top, sbrivio@redhat.com, lvivier@redhat.com,
dgibson@redhat.com, jmaloy@redhat.com
Subject: [PATCH v8 4/4] udp: create and send ICMPv6 to local peer when applicable
Date: Fri, 28 Feb 2025 17:41:21 -0500 [thread overview]
Message-ID: <20250228224121.815201-5-jmaloy@redhat.com> (raw)
In-Reply-To: <20250228224121.815201-1-jmaloy@redhat.com>
When a local peer sends a UDP message to a non-existing port on an
existing remote host, that host will return an ICMPv6 message containing
the error code ICMP6_DST_UNREACH_NOPORT, plus the IPv6 header, UDP header
and the first 1232 bytes of the original message, if any. If the sender
socket has been connected, it uses this message to issue a
"Connection Refused" event to the user.
Until now, we have only read such events from the externally facing
socket, but we don't forward them back to the local sender because
we cannot read the ICMP message directly to user space. Because of
this, the local peer will hang and wait for a response that never
arrives.
We now fix this for IPv6 by recreating and forwarding a correct ICMP
message back to the internal sender. We synthesize the message based
on the information in the extended error structure, plus the returned
part of the original message body.
Note that for the sake of completeness, we even produce ICMP messages
for other error types and codes. We have noticed that at least
ICMP_PROT_UNREACH is propagated as an error event back to the user.
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
v2: - Handling all types or ICMP types and codes
- Returning up to 1232 bytes of user data as per RFC 4884.
Both suggested by anonymous PASST user.
v3: - Added ASSERT() in the ICMPv6 message creation function.
Suggested by David Gibson.
---
tap.c | 8 ++++----
tap.h | 4 ++++
udp.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
3 files changed, 65 insertions(+), 7 deletions(-)
diff --git a/tap.c b/tap.c
index 8aa47bd..7e4bc00 100644
--- a/tap.c
+++ b/tap.c
@@ -247,10 +247,10 @@ void tap_icmp4_send(const struct ctx *c, struct in_addr src, struct in_addr dst,
*
* Return: pointer at which to write the packet's payload
*/
-static void *tap_push_ip6h(struct ipv6hdr *ip6h,
- const struct in6_addr *src,
- const struct in6_addr *dst,
- size_t l4len, uint8_t proto, uint32_t flow)
+void *tap_push_ip6h(struct ipv6hdr *ip6h,
+ const struct in6_addr *src,
+ const struct in6_addr *dst,
+ size_t l4len, uint8_t proto, uint32_t flow)
{
ip6h->payload_len = htons(l4len);
ip6h->priority = 0;
diff --git a/tap.h b/tap.h
index b7b8cef..67aa9e6 100644
--- a/tap.h
+++ b/tap.h
@@ -53,6 +53,10 @@ void *tap_push_uh6(struct udphdr *uh,
void *in, size_t dlen);
void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
struct in_addr dst, size_t l4len, uint8_t proto);
+void *tap_push_ip6h(struct ipv6hdr *ip6h,
+ const struct in6_addr *src,
+ const struct in6_addr *dst,
+ size_t l4len, uint8_t proto, uint32_t flow);
void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
struct in_addr dst, in_port_t dport,
const void *in, size_t dlen);
diff --git a/udp.c b/udp.c
index 8b53475..3e17fda 100644
--- a/udp.c
+++ b/udp.c
@@ -88,6 +88,7 @@
#include <netinet/ip.h>
#include <netinet/udp.h>
#include <netinet/ip_icmp.h>
+#include <netinet/icmp6.h>
#include <stdint.h>
#include <stddef.h>
#include <string.h>
@@ -115,6 +116,9 @@
/* Maximum UDP data to be returned in ICMP messages */
#define ICMP4_MAX_DLEN 8
+#define ICMP6_MAX_DLEN (IPV6_MIN_MTU \
+ - sizeof(struct udphdr) \
+ - sizeof(struct ipv6hdr))
/* "Spliced" sockets indexed by bound port (host order) */
static int udp_splice_ns [IP_VERSIONS][NUM_PORTS];
@@ -446,6 +450,49 @@ static void udp_send_conn_fail_icmp4(const struct ctx *c,
tap_icmp4_send(c, oaddr, eaddr, &msg, msglen);
}
+
+/**
+ * udp_send_conn_fail_icmp6() - Construct and send ICMPv6 to local peer
+ * @c: Execution context
+ * @ee: Extended error descriptor
+ * @ref: epoll reference
+ * @in: First bytes (max 1232) of original UDP message body
+ * @dlen: Length of the read part of original UDP message body
+ * @flow: IPv6 flow identifier
+ */
+static void udp_send_conn_fail_icmp6(const struct ctx *c,
+ const struct sock_extended_err *ee,
+ const struct flowside *toside,
+ void *in, size_t dlen, uint32_t flow)
+{
+ const struct in6_addr *oaddr = &toside->oaddr.a6;
+ const struct in6_addr *eaddr = &toside->eaddr.a6;
+ in_port_t eport = toside->eport;
+ in_port_t oport = toside->oport;
+ struct {
+ struct icmp6_hdr icmp6h;
+ struct ipv6hdr ip6h;
+ struct udphdr uh;
+ char data[ICMP6_MAX_DLEN];
+ } __attribute__((packed, aligned(__alignof__(max_align_t)))) msg;
+ size_t msglen = sizeof(msg) - sizeof(msg.data) + dlen;
+ size_t l4len = dlen + sizeof(struct udphdr);
+
+ ASSERT(dlen <= ICMP6_MAX_DLEN);
+ memset(&msg, 0, sizeof(msg));
+ msg.icmp6h.icmp6_type = ee->ee_type;
+ msg.icmp6h.icmp6_code = ee->ee_code;
+ if (ee->ee_type == ICMP6_PACKET_TOO_BIG)
+ msg.icmp6h.icmp6_dataun.icmp6_un_data32[0] = htonl(ee->ee_info);
+
+ /* Reconstruct the original headers as returned in the ICMP message */
+ tap_push_ip6h(&msg.ip6h, eaddr, oaddr, l4len, IPPROTO_UDP, flow);
+ tap_push_uh6(&msg.uh, eaddr, eport, oaddr, oport, in, dlen);
+ memcpy(&msg.data, in, dlen);
+
+ tap_icmp6_send(c, oaddr, eaddr, &msg, msglen);
+}
+
/**
* udp_sock_recverr() - Receive and clear an error from a socket
* @c: Execution context
@@ -461,7 +508,7 @@ static int udp_sock_recverr(const struct ctx *c, union epoll_ref ref)
const struct sock_extended_err *ee;
const struct cmsghdr *hdr;
char buf[CMSG_SPACE(sizeof(*ee))];
- char data[ICMP4_MAX_DLEN];
+ char data[ICMP6_MAX_DLEN];
int s = ref.fd;
struct iovec iov = {
.iov_base = data,
@@ -504,8 +551,15 @@ static int udp_sock_recverr(const struct ctx *c, union epoll_ref ref)
if (ref.type == EPOLL_TYPE_UDP_REPLY) {
flow_sidx_t sidx = flow_sidx_opposite(ref.flowside);
const struct flowside *toside = flowside_at_sidx(sidx);
-
- udp_send_conn_fail_icmp4(c, ee, toside, data, rc);
+ size_t dlen = rc;
+
+ if (hdr->cmsg_level == IPPROTO_IP) {
+ dlen = MIN(dlen, ICMP4_MAX_DLEN);
+ udp_send_conn_fail_icmp4(c, ee, toside, data, dlen);
+ } else if (hdr->cmsg_level == IPPROTO_IPV6) {
+ udp_send_conn_fail_icmp6(c, ee, toside, data,
+ dlen, sidx.flowi);
+ }
} else {
trace("Ignoring received IP_RECVERR cmsg on listener socket");
}
--
@@ -88,6 +88,7 @@
#include <netinet/ip.h>
#include <netinet/udp.h>
#include <netinet/ip_icmp.h>
+#include <netinet/icmp6.h>
#include <stdint.h>
#include <stddef.h>
#include <string.h>
@@ -115,6 +116,9 @@
/* Maximum UDP data to be returned in ICMP messages */
#define ICMP4_MAX_DLEN 8
+#define ICMP6_MAX_DLEN (IPV6_MIN_MTU \
+ - sizeof(struct udphdr) \
+ - sizeof(struct ipv6hdr))
/* "Spliced" sockets indexed by bound port (host order) */
static int udp_splice_ns [IP_VERSIONS][NUM_PORTS];
@@ -446,6 +450,49 @@ static void udp_send_conn_fail_icmp4(const struct ctx *c,
tap_icmp4_send(c, oaddr, eaddr, &msg, msglen);
}
+
+/**
+ * udp_send_conn_fail_icmp6() - Construct and send ICMPv6 to local peer
+ * @c: Execution context
+ * @ee: Extended error descriptor
+ * @ref: epoll reference
+ * @in: First bytes (max 1232) of original UDP message body
+ * @dlen: Length of the read part of original UDP message body
+ * @flow: IPv6 flow identifier
+ */
+static void udp_send_conn_fail_icmp6(const struct ctx *c,
+ const struct sock_extended_err *ee,
+ const struct flowside *toside,
+ void *in, size_t dlen, uint32_t flow)
+{
+ const struct in6_addr *oaddr = &toside->oaddr.a6;
+ const struct in6_addr *eaddr = &toside->eaddr.a6;
+ in_port_t eport = toside->eport;
+ in_port_t oport = toside->oport;
+ struct {
+ struct icmp6_hdr icmp6h;
+ struct ipv6hdr ip6h;
+ struct udphdr uh;
+ char data[ICMP6_MAX_DLEN];
+ } __attribute__((packed, aligned(__alignof__(max_align_t)))) msg;
+ size_t msglen = sizeof(msg) - sizeof(msg.data) + dlen;
+ size_t l4len = dlen + sizeof(struct udphdr);
+
+ ASSERT(dlen <= ICMP6_MAX_DLEN);
+ memset(&msg, 0, sizeof(msg));
+ msg.icmp6h.icmp6_type = ee->ee_type;
+ msg.icmp6h.icmp6_code = ee->ee_code;
+ if (ee->ee_type == ICMP6_PACKET_TOO_BIG)
+ msg.icmp6h.icmp6_dataun.icmp6_un_data32[0] = htonl(ee->ee_info);
+
+ /* Reconstruct the original headers as returned in the ICMP message */
+ tap_push_ip6h(&msg.ip6h, eaddr, oaddr, l4len, IPPROTO_UDP, flow);
+ tap_push_uh6(&msg.uh, eaddr, eport, oaddr, oport, in, dlen);
+ memcpy(&msg.data, in, dlen);
+
+ tap_icmp6_send(c, oaddr, eaddr, &msg, msglen);
+}
+
/**
* udp_sock_recverr() - Receive and clear an error from a socket
* @c: Execution context
@@ -461,7 +508,7 @@ static int udp_sock_recverr(const struct ctx *c, union epoll_ref ref)
const struct sock_extended_err *ee;
const struct cmsghdr *hdr;
char buf[CMSG_SPACE(sizeof(*ee))];
- char data[ICMP4_MAX_DLEN];
+ char data[ICMP6_MAX_DLEN];
int s = ref.fd;
struct iovec iov = {
.iov_base = data,
@@ -504,8 +551,15 @@ static int udp_sock_recverr(const struct ctx *c, union epoll_ref ref)
if (ref.type == EPOLL_TYPE_UDP_REPLY) {
flow_sidx_t sidx = flow_sidx_opposite(ref.flowside);
const struct flowside *toside = flowside_at_sidx(sidx);
-
- udp_send_conn_fail_icmp4(c, ee, toside, data, rc);
+ size_t dlen = rc;
+
+ if (hdr->cmsg_level == IPPROTO_IP) {
+ dlen = MIN(dlen, ICMP4_MAX_DLEN);
+ udp_send_conn_fail_icmp4(c, ee, toside, data, dlen);
+ } else if (hdr->cmsg_level == IPPROTO_IPV6) {
+ udp_send_conn_fail_icmp6(c, ee, toside, data,
+ dlen, sidx.flowi);
+ }
} else {
trace("Ignoring received IP_RECVERR cmsg on listener socket");
}
--
2.48.1
prev parent reply other threads:[~2025-02-28 22:41 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-28 22:41 [PATCH v8 0/4] Reconstruct incoming ICMP headers for failed UDP connect and forward back Jon Maloy
2025-02-28 22:41 ` [PATCH v8 1/4] tap: break out building of udp header from tap_udp4_send function Jon Maloy
2025-02-28 22:41 ` [PATCH v8 2/4] udp: create and send ICMPv4 to local peer when applicable Jon Maloy
2025-02-28 22:41 ` [PATCH v8 3/4] tap: break out building of udp header from tap_udp6_send function Jon Maloy
2025-02-28 22:41 ` Jon Maloy [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250228224121.815201-5-jmaloy@redhat.com \
--to=jmaloy@redhat.com \
--cc=dgibson@redhat.com \
--cc=lvivier@redhat.com \
--cc=passt-dev@passt.top \
--cc=sbrivio@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).