On Sat, Mar 15, 2025 at 11:32:44AM -0400, Jon Maloy wrote: > While developing traceroute forwarding tap-to-sock we found that > struct msghdr.msg_name for the ICMPs in the opposite direction always > contains the destination address of the original UDP message, and not, > as one might expect, the one of the host which created the error message. > > Study of the kernel code reveals that this address instead is appended > as extra data after the received struct sock_extended_err area. > > We now change the ICMP receive code accordingly. > > Fixes: 55431f0077b6 ("udp: create and send ICMPv4 to local peer when applicable") > Fixes: 68b04182e07d ("udp: create and send ICMPv6 to local peer when applicable") > > Signed-off-by: Jon Maloy The idea looks good, a few minor warts in the implementation. > --- > udp.c | 26 +++++++++++++++----------- > 1 file changed, 15 insertions(+), 11 deletions(-) > > diff --git a/udp.c b/udp.c > index 80520cb..271e570 100644 > --- a/udp.c > +++ b/udp.c > @@ -510,10 +510,13 @@ static void udp_send_conn_fail_icmp6(const struct ctx *c, > */ > static int udp_sock_recverr(const struct ctx *c, union epoll_ref ref) > { > - const struct sock_extended_err *ee; > + struct errhdr { > + struct sock_extended_err ee; > + union sockaddr_inany saddr; > + }; > + const struct errhdr *eh; > const struct cmsghdr *hdr; > - union sockaddr_inany saddr; > - char buf[CMSG_SPACE(sizeof(*ee))]; > + char buf[CMSG_SPACE(sizeof(struct errhdr))]; > char data[ICMP6_MAX_DLEN]; > int s = ref.fd; > struct iovec iov = { > @@ -521,11 +524,11 @@ static int udp_sock_recverr(const struct ctx *c, union epoll_ref ref) > .iov_len = sizeof(data) > }; > struct msghdr mh = { > - .msg_name = &saddr, > - .msg_namelen = sizeof(saddr), > + .msg_name = 0, > + .msg_namelen = 0, I think you can just omit these and they'll be zero initialized. > .msg_iov = &iov, > .msg_iovlen = 1, > - .msg_control = buf, > + .msg_control = buf,//(void *)&errhdr, Looks like you have a leftover comment here. > .msg_controllen = sizeof(buf), > }; > ssize_t rc; > @@ -553,7 +556,7 @@ static int udp_sock_recverr(const struct ctx *c, union epoll_ref ref) > return -1; > } > > - ee = (const struct sock_extended_err *)CMSG_DATA(hdr); > + eh = (const struct errhdr *)CMSG_DATA(hdr); > if (ref.type == EPOLL_TYPE_UDP_REPLY) { > flow_sidx_t sidx = flow_sidx_opposite(ref.flowside); > const struct flowside *toside = flowside_at_sidx(sidx); > @@ -561,18 +564,19 @@ static int udp_sock_recverr(const struct ctx *c, union epoll_ref ref) > > if (hdr->cmsg_level == IPPROTO_IP) { > dlen = MIN(dlen, ICMP4_MAX_DLEN); > - udp_send_conn_fail_icmp4(c, ee, toside, saddr.sa4.sin_addr, > + udp_send_conn_fail_icmp4(c, &eh->ee, toside, > + eh->saddr.sa4.sin_addr, > data, dlen); So, it looks from man ip(7) that you're supposed to use the SO_EE_OFFENDER() macro to get the socket address, rather than just assuming it's immediately after the struct sock_extended_err. Except... without assuming that, I can't really see how you're supposed to properly size the buffer > } else if (hdr->cmsg_level == IPPROTO_IPV6) { > - udp_send_conn_fail_icmp6(c, ee, toside, > - &saddr.sa6.sin6_addr, > + udp_send_conn_fail_icmp6(c, &eh->ee, toside, > + &eh->saddr.sa6.sin6_addr, > data, dlen, sidx.flowi); > } > } else { > trace("Ignoring received IP_RECVERR cmsg on listener socket"); > } > debug("%s error on UDP socket %i: %s", > - str_ee_origin(ee), s, strerror_(ee->ee_errno)); > + str_ee_origin(&eh->ee), s, strerror_(eh->ee.ee_errno)); > > return 1; > } -- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson