On Wed, Feb 19, 2025 at 02:30:07PM -0500, Jon Maloy wrote: > When a local peer sends a UDP message to a non-existing port on an > existing remote host, that host will return an ICMP message containing > the error code ICMP_PORT_UNREACH, plus the header and the first eight > bytes of the original message. If the sender socket has been connected, > it uses this message to issue a "Connection Refused" event to the user. > > Until now, we have only read such events from the externally facing > socket, but we don't forward them back to the local sender because > we cannot read the ICMP message directly to user space. Because of > this, the local peer will hang and wait for a response that never > arrives. > > We now fix this for IPv4 by recreating and forwarding a correct ICMP > message back to the internal sender. We synthesize the message based > on the information in the extended error structure, plus the returned > part of the original message body. > > Note that for the sake of completeness, we even produce ICMP messages > for other error codes. We have noticed that at least ICMP_PROT_UNREACH > is propagated as an error event back to the user. > > Signed-off-by: Jon Maloy Almost there... [snip] > ee = (const struct sock_extended_err *)CMSG_DATA(hdr); > - > - /* TODO: When possible propagate and otherwise handle errors */ > + if (ee->ee_type == ICMP_DEST_UNREACH) { > + flow_sidx_t sidx; > + struct udp_flow *flow; > + const struct flowside *toside; > + > + if (ref.type == EPOLL_TYPE_UDP_LISTEN) { > + sidx = flow_lookup_sa(c, IPPROTO_UDP, ref.udp.pif, > + &saddr, ref.udp.port); > + flow = udp_at_sidx(sidx); > + if (!flow) { > + err("Unexpected cmsg reading error queue"); > + return -1; This is too strong a response if you can't find a flow. This will happen if we get some random ICMP which doesn't match an existing flow. It could be a long delayed ICMP from some flow that already expired, some peer confused by something, or a malicious actor. This will log a hard error (without rate limiting) and break scanning for any additional errors. I think we want just a trace() and return 1; here (we did receive and process an error - but all we could do was drop it). > + } > + flow->ts = now->tv_sec; > + sidx = flow_sidx_opposite(sidx); > + } else { > + sidx = flow_sidx_opposite(ref.flowside); > + } > + toside = flowside_at_sidx(sidx); > + udp_send_conn_fail_icmp4(c, ee, toside, udp_data, rc); > + } > debug("%s error on UDP socket %i: %s", > str_ee_origin(ee), s, strerror_(ee->ee_errno)); > > @@ -461,15 +528,18 @@ static int udp_sock_recverr(int s) > /** > * udp_sock_errs() - Process errors on a socket > * @c: Execution context > - * @s: Socket to receive from > + * @ref: epoll reference > * @events: epoll events bitmap > + * @now: Current timestamp > * > * Return: Number of errors handled, or < 0 if we have an unrecoverable error > */ > -int udp_sock_errs(const struct ctx *c, int s, uint32_t events) > +int udp_sock_errs(const struct ctx *c, union epoll_ref ref, uint32_t events, > + const struct timespec *now) > { > unsigned n_err = 0; > socklen_t errlen; > + int s = ref.fd; > int rc, err; > > ASSERT(!c->no_udp); > @@ -478,7 +548,7 @@ int udp_sock_errs(const struct ctx *c, int s, uint32_t events) > return 0; /* Nothing to do */ > > /* Empty the error queue */ > - while ((rc = udp_sock_recverr(s)) > 0) > + while ((rc = udp_sock_recverr(c, ref, now)) > 0) > n_err += rc; > > if (rc < 0) > @@ -558,7 +628,7 @@ static void udp_buf_listen_sock_handler(const struct ctx *c, > const socklen_t sasize = sizeof(udp_meta[0].s_in); > int n, i; > > - if (udp_sock_errs(c, ref.fd, events) < 0) { > + if (udp_sock_errs(c, ref, events, now) < 0) { > err("UDP: Unrecoverable error on listening socket:" > " (%s port %hu)", pif_name(ref.udp.pif), ref.udp.port); > /* FIXME: what now? close/re-open socket? */ > @@ -661,7 +731,7 @@ static void udp_buf_reply_sock_handler(const struct ctx *c, union epoll_ref ref, > > from_s = uflow->s[ref.flowside.sidei]; > > - if (udp_sock_errs(c, from_s, events) < 0) { > + if (udp_sock_errs(c, ref, events, now) < 0) { > flow_err(uflow, "Unrecoverable error on reply socket"); > flow_err_details(uflow); > udp_flow_close(c, uflow); > diff --git a/udp_internal.h b/udp_internal.h > index cc80e30..c5f8304 100644 > --- a/udp_internal.h > +++ b/udp_internal.h > @@ -30,5 +30,6 @@ size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp, > size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp, > const struct flowside *toside, size_t dlen, > bool no_udp_csum); > -int udp_sock_errs(const struct ctx *c, int s, uint32_t events); > +int udp_sock_errs(const struct ctx *c, union epoll_ref ref, uint32_t events, > + const struct timespec *now); > #endif /* UDP_INTERNAL_H */ > diff --git a/udp_vu.c b/udp_vu.c > index 4123510..0b1d3c6 100644 > --- a/udp_vu.c > +++ b/udp_vu.c > @@ -227,7 +227,7 @@ void udp_vu_listen_sock_handler(const struct ctx *c, union epoll_ref ref, > struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; > int i; > > - if (udp_sock_errs(c, ref.fd, events) < 0) { > + if (udp_sock_errs(c, ref, events, now) < 0) { > err("UDP: Unrecoverable error on listening socket:" > " (%s port %hu)", pif_name(ref.udp.pif), ref.udp.port); > return; > @@ -302,7 +302,7 @@ void udp_vu_reply_sock_handler(const struct ctx *c, union epoll_ref ref, > > ASSERT(!c->no_udp); > > - if (udp_sock_errs(c, from_s, events) < 0) { > + if (udp_sock_errs(c, ref, events, now) < 0) { > flow_err(uflow, "Unrecoverable error on reply socket"); > flow_err_details(uflow); > udp_flow_close(c, uflow); -- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson