On Thu, Feb 27, 2025 at 04:35:16PM -0500, Jon Maloy wrote: > When a local peer sends a UDP message to a non-existing port on an > existing remote host, that host will return an ICMP message containing > the error code ICMP_PORT_UNREACH, plus the header and the first eight > bytes of the original message. If the sender socket has been connected, > it uses this message to issue a "Connection Refused" event to the user. > > Until now, we have only read such events from the externally facing > socket, but we don't forward them back to the local sender because > we cannot read the ICMP message directly to user space. Because of > this, the local peer will hang and wait for a response that never > arrives. > > We now fix this for IPv4 by recreating and forwarding a correct ICMP > message back to the internal sender. We synthesize the message based > on the information in the extended error structure, plus the returned > part of the original message body. > > Note that for the sake of completeness, we even produce ICMP messages > for other error codes. We have noticed that at least ICMP_PROT_UNREACH > is propagated as an error event back to the user. > > Reviewed-by: David Gibson > Signed-off-by: Jon Maloy I know I reviewed this already, but I did spot a couple of extra things with the context of the later patches. [snip] > +/** > + * udp_send_conn_fail_icmp4() - Construct and send ICMPv4 to local peer > + * @c: Execution context > + * @ee: Extended error descriptor > + * @ref: epoll reference > + * @in: First bytes (max 8) of original UDP message body > + * @dlen: Length of the read part of original UDP message body > + */ > +static void udp_send_conn_fail_icmp4(const struct ctx *c, > + const struct sock_extended_err *ee, > + const struct flowside *toside, > + void *in, size_t dlen) > +{ > + struct in_addr oaddr = toside->oaddr.v4mapped.a4; > + struct in_addr eaddr = toside->eaddr.v4mapped.a4; > + in_port_t eport = toside->eport; > + in_port_t oport = toside->oport; > + struct { > + struct icmphdr icmp4h; > + struct iphdr ip4h; > + struct udphdr uh; > + char data[ICMP4_MAX_DLEN]; > + } __attribute__((packed, aligned(__alignof__(max_align_t)))) msg; > + size_t msglen = sizeof(msg) - sizeof(msg.data) + dlen; > + Having an ASSERT(dlen < ICMP_MAX_DLEN) would make me (and maybe some static checkers) a bit less nervous :). > + memset(&msg, 0, sizeof(msg)); > + msg.icmp4h.type = ee->ee_type; > + msg.icmp4h.code = ee->ee_code; Do you need any extra info here in the case of an ICMP Would Fragment message, as you do for the sort of equivalent ICMPv6 Packet Too Big? Not that I consider supporting MTU discovery essential in this round of patches, but it would be a little odd to implement it for IPv6 but not IPv4. [snip] > - /* TODO: When possible propagate and otherwise handle errors */ > + udp_send_conn_fail_icmp4(c, ee, toside, data, rc); > + } else { > + trace("Ignoring received cmsg on listener socket"); Nit: I think you can be more specific that this is an IP_RECVERR cmsg. > + } > debug("%s error on UDP socket %i: %s", > str_ee_origin(ee), s, strerror_(ee->ee_errno)); > > @@ -461,15 +515,16 @@ static int udp_sock_recverr(int s) > /** > * udp_sock_errs() - Process errors on a socket > * @c: Execution context > - * @s: Socket to receive from > + * @ref: epoll reference > * @events: epoll events bitmap > * > * Return: Number of errors handled, or < 0 if we have an unrecoverable error > */ > -int udp_sock_errs(const struct ctx *c, int s, uint32_t events) > +int udp_sock_errs(const struct ctx *c, union epoll_ref ref, uint32_t events) > { > unsigned n_err = 0; > socklen_t errlen; > + int s = ref.fd; > int rc, err; > > ASSERT(!c->no_udp); > @@ -478,7 +533,7 @@ int udp_sock_errs(const struct ctx *c, int s, uint32_t events) > return 0; /* Nothing to do */ > > /* Empty the error queue */ > - while ((rc = udp_sock_recverr(s)) > 0) > + while ((rc = udp_sock_recverr(c, ref)) > 0) > n_err += rc; > > if (rc < 0) > @@ -510,7 +565,7 @@ int udp_sock_errs(const struct ctx *c, int s, uint32_t events) > * @c: Execution context > * @s: Socket to receive from > * @events: epoll events bitmap > - * @mmh mmsghdr array to receive into > + * @mmh mmsghdr array to receive into This appears to be a spurious whitespace change. > * > * Return: Number of datagrams received > * > @@ -558,7 +613,7 @@ static void udp_buf_listen_sock_handler(const struct ctx *c, > const socklen_t sasize = sizeof(udp_meta[0].s_in); > int n, i; > > - if (udp_sock_errs(c, ref.fd, events) < 0) { > + if (udp_sock_errs(c, ref, events) < 0) { > err("UDP: Unrecoverable error on listening socket:" > " (%s port %hu)", pif_name(ref.udp.pif), ref.udp.port); > /* FIXME: what now? close/re-open socket? */ > @@ -661,7 +716,7 @@ static void udp_buf_reply_sock_handler(const struct ctx *c, union epoll_ref ref, > > from_s = uflow->s[ref.flowside.sidei]; > > - if (udp_sock_errs(c, from_s, events) < 0) { > + if (udp_sock_errs(c, ref, events) < 0) { > flow_err(uflow, "Unrecoverable error on reply socket"); > flow_err_details(uflow); > udp_flow_close(c, uflow); > diff --git a/udp_internal.h b/udp_internal.h > index cc80e30..3b081f5 100644 > --- a/udp_internal.h > +++ b/udp_internal.h > @@ -30,5 +30,5 @@ size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp, > size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp, > const struct flowside *toside, size_t dlen, > bool no_udp_csum); > -int udp_sock_errs(const struct ctx *c, int s, uint32_t events); > +int udp_sock_errs(const struct ctx *c, union epoll_ref ref, uint32_t events); > #endif /* UDP_INTERNAL_H */ > diff --git a/udp_vu.c b/udp_vu.c > index 4123510..c26a223 100644 > --- a/udp_vu.c > +++ b/udp_vu.c > @@ -227,7 +227,7 @@ void udp_vu_listen_sock_handler(const struct ctx *c, union epoll_ref ref, > struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; > int i; > > - if (udp_sock_errs(c, ref.fd, events) < 0) { > + if (udp_sock_errs(c, ref, events) < 0) { > err("UDP: Unrecoverable error on listening socket:" > " (%s port %hu)", pif_name(ref.udp.pif), ref.udp.port); > return; > @@ -302,7 +302,7 @@ void udp_vu_reply_sock_handler(const struct ctx *c, union epoll_ref ref, > > ASSERT(!c->no_udp); > > - if (udp_sock_errs(c, from_s, events) < 0) { > + if (udp_sock_errs(c, ref, events) < 0) { > flow_err(uflow, "Unrecoverable error on reply socket"); > flow_err_details(uflow); > udp_flow_close(c, uflow); -- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson