// SPDX-License-Identifier: GPL-2.0-or-later /* PASST - Plug A Simple Socket Transport * for qemu/UNIX domain socket mode * * PASTA - Pack A Subtle Tap Abstraction * for network namespace/tap device mode * * icmp.c - ICMP/ICMPv6 echo proxy * * Copyright (c) 2021 Red Hat GmbH * Author: Stefano Brivio */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "packet.h" #include "util.h" #include "passt.h" #include "tap.h" #include "log.h" #include "icmp.h" #include "flow_table.h" #define ICMP_ECHO_TIMEOUT 60 /* s, timeout for ICMP socket activity */ #define ICMP_NUM_IDS (1U << 16) /* Sides of a flow as we use them for ping streams */ #define SOCKSIDE 0 #define TAPSIDE 1 #define PINGF(idx) (&(FLOW(idx)->ping)) #define TAPFSIDE(pingf) (&(pingf)->f.side[TAPSIDE]) #define SOCKFSIDE(pingf) (&(pingf)->f.side[SOCKSIDE]) /* Indexed by ICMP echo identifier */ static struct icmp_ping_flow *icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS]; /** * icmp_sock_handler() - Handle new data from ICMP or ICMPv6 socket * @c: Execution context * @af: Address family (AF_INET or AF_INET6) * @ref: epoll reference */ void icmp_sock_handler(const struct ctx *c, int af, union epoll_ref ref) { const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6"; struct icmp_ping_flow *pingf = PINGF(ref.flowside.flow); char buf[USHRT_MAX]; union { struct sockaddr sa; struct sockaddr_in sa4; struct sockaddr_in6 sa6; } sr; uint16_t id = TAPFSIDE(pingf)->eport, seq; socklen_t sl = sizeof(sr); ssize_t n; if (c->no_icmp) return; ASSERT(pingf); n = recvfrom(ref.fd, buf, sizeof(buf), 0, &sr.sa, &sl); if (n < 0) { warn("%s: recvfrom() error on ping socket: %s", pname, strerror(errno)); return; } if (sr.sa.sa_family != af) goto unexpected; if (af == AF_INET) { struct icmphdr *ih4 = (struct icmphdr *)buf; if ((size_t)n < sizeof(*ih4) || ih4->type != ICMP_ECHOREPLY) goto unexpected; /* Adjust packet back to guest-side ID */ ih4->un.echo.id = htons(id); seq = ntohs(ih4->un.echo.sequence); } else if (af == AF_INET6) { struct icmp6hdr *ih6 = (struct icmp6hdr *)buf; if ((size_t)n < sizeof(*ih6) || ih6->icmp6_type != ICMPV6_ECHO_REPLY) goto unexpected; /* Adjust packet back to guest-side ID */ ih6->icmp6_identifier = htons(id); seq = ntohs(ih6->icmp6_sequence); } else { ASSERT(0); } if (c->mode == MODE_PASTA) { if (pingf->seq == seq) return; pingf->seq = seq; } debug("%s: echo reply to tap, ID: %"PRIu16", seq: %"PRIu16, pname, id, seq); if (af == AF_INET) { const struct in_addr *saddr = inany_v4(&TAPFSIDE(pingf)->faddr); const struct in_addr *daddr = inany_v4(&TAPFSIDE(pingf)->eaddr); ASSERT(saddr && daddr); /* Must have IPv4 addresses */ tap_icmp4_send(c, *saddr, *daddr, buf, n); } else if (af == AF_INET6) { const struct in6_addr *saddr = &TAPFSIDE(pingf)->faddr.a6; const struct in6_addr *daddr = &TAPFSIDE(pingf)->eaddr.a6; tap_icmp6_send(c, saddr, daddr, buf, n); } return; unexpected: warn("%s: Unexpected packet on ping socket", pname); } /** * icmp_ping_close() - Close out and cleanup a ping flow * @c: Execution context * @pingf: ping flow entry to close */ static void icmp_ping_close(const struct ctx *c, struct icmp_ping_flow *pingf) { epoll_ctl(c->epollfd, EPOLL_CTL_DEL, pingf->sock, NULL); close(pingf->sock); flow_hash_remove(c, FLOW_SIDX(pingf, TAPSIDE)); if (pingf->f.type == FLOW_PING4) icmp_id_map[V4][pingf->id] = NULL; else icmp_id_map[V6][pingf->id] = NULL; } /** * icmp_ping_new() - Prepare a new ping socket for a new id * @c: Execution context * @id_map: id map entry of the sequence to open * @af: Address family, AF_INET or AF_INET6 * @id: ICMP id for the new sequence * @saddr: Source address * @daddr: Destination address * * Return: Newly opened ping flow, or NULL on failure */ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c, struct icmp_ping_flow **id_map, int af, uint16_t id, const void *saddr, const void *daddr) { const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6"; uint8_t flowtype = af == AF_INET ? FLOW_PING4 : FLOW_PING6; union flow *flow = flow_alloc(); struct icmp_ping_flow *pingf; const void *bind_addr; const char *bind_if; union epoll_ref ref; int s; if (!flow) return NULL; if (af == AF_INET) { ref.type = EPOLL_TYPE_ICMP; bind_addr = &c->ip4.addr_out; bind_if = c->ip4.ifname_out; } else { ref.type = EPOLL_TYPE_ICMPV6; bind_addr = &c->ip6.addr_out; bind_if = c->ip6.ifname_out; } ref.flowside = FLOW_SIDX(flow, SOCKSIDE); s = sock_l4(c, af, flow_proto[flowtype], bind_addr, bind_if, 0, ref.data); if (s < 0) { warn("Cannot open \"ping\" socket. You might need to:"); warn(" sysctl -w net.ipv4.ping_group_range=\"0 2147483647\""); warn("...echo requests/replies will fail."); goto cancel; } if (s > FD_REF_MAX) goto cancel; pingf = &flow->ping; pingf->f.type = flowtype; pingf->seq = -1; pingf->sock = s; pingf->id = id; *id_map = pingf; debug("%s: new socket %i for echo ID %"PRIu16, pname, s, id); flowside_from_af(TAPFSIDE(pingf), PIF_TAP, af, daddr, id, saddr, id); FLOW_NEW_DBG(pingf, TAPSIDE); flow_hash_insert(c, FLOW_SIDX(pingf, TAPSIDE)); return pingf; cancel: if (s >= 0) close(s); flow_alloc_cancel(flow); return NULL; } /** * icmp_tap_handler() - Handle packets from tap * @c: Execution context * @pif: pif on which the packet is arriving * @af: Address family, AF_INET or AF_INET6 * @saddr: Source address * @daddr: Destination address * @p: Packet pool, single packet with ICMP/ICMPv6 header * @now: Current timestamp * * Return: count of consumed packets (always 1, even if malformed) */ int icmp_tap_handler(const struct ctx *c, uint8_t pif, int af, const void *saddr, const void *daddr, const struct pool *p, const struct timespec *now) { const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6"; union { struct sockaddr sa; struct sockaddr_in sa4; struct sockaddr_in6 sa6; } sa = { .sa.sa_family = af }; const socklen_t sl = af == AF_INET ? sizeof(sa.sa4) : sizeof(sa.sa6); struct icmp_ping_flow *pingf, **id_map; union flow *flow; uint16_t id, seq; uint8_t proto; size_t plen; void *pkt; (void)saddr; (void)pif; pkt = packet_get(p, 0, 0, 0, &plen); if (!pkt) return 1; if (af == AF_INET) { struct icmphdr *ih = (struct icmphdr *)pkt; if (plen < sizeof(*ih)) return 1; if (ih->type != ICMP_ECHO) return 1; proto = IPPROTO_ICMP; id = ntohs(ih->un.echo.id); id_map = &icmp_id_map[V4][id]; seq = ntohs(ih->un.echo.sequence); sa.sa4.sin_addr = *(struct in_addr *)daddr; } else if (af == AF_INET6) { struct icmp6hdr *ih = (struct icmp6hdr *)pkt; if (plen < sizeof(*ih)) return 1; if (ih->icmp6_type != ICMPV6_ECHO_REQUEST) return 1; proto = IPPROTO_ICMPV6; id = ntohs(ih->icmp6_identifier); id_map = &icmp_id_map[V6][id]; seq = ntohs(ih->icmp6_sequence); sa.sa6.sin6_addr = *(struct in6_addr *)daddr; sa.sa6.sin6_scope_id = c->ifi6; } else { ASSERT(0); } flow = flow_at_sidx(flow_hash_lookup(c, proto, PIF_TAP, af, saddr, daddr, id, id)); if (flow) pingf = &flow->ping; else if (!(pingf = icmp_ping_new(c, id_map, af, id, saddr, daddr))) return 1; ASSERT(flow_proto[pingf->f.type] == proto); pingf->ts = now->tv_sec; if (sendto(pingf->sock, pkt, plen, MSG_NOSIGNAL, &sa.sa, sl) < 0) { debug("%s: failed to relay request to socket: %s", pname, strerror(errno)); if (flow) goto cancel; } debug("%s: echo request to socket, ID: %"PRIu16", seq: %"PRIu16, pname, id, seq); if (!flow) /* Nothing more to do for an existing flow */ return 1; /* We need to wait until after the sendto() to fill in the SOCKSIDE * information, so that we can find out the host side id the kernel * assigned. If there's no bind address specified, this will still have * 0.0.0.0 or :: as the host side forwarding address. There's not * really anything we can do to fill that in, which means we can never * insert the SOCKSIDE of a ping flow into the hash table. */ if (flowside_from_sock(SOCKFSIDE(pingf), PIF_HOST, pingf->sock, NULL, &sa) < 0) { err("%s: Failed to get local name for outgoing ping socket", pname); goto cancel; } /* We want the id as the "port" on both sides */ SOCKFSIDE(pingf)->eport = SOCKFSIDE(pingf)->fport; FLOW_FWD_DBG(pingf, SOCKSIDE); return 1; cancel: /* Something went wrong, back out creation of the flow */ icmp_ping_close(c, pingf); flow_alloc_cancel(flow); return 1; } /** * icmp_ping_timer() - Handler for timed events related to a given flow * @c: Execution context * @flow: flow table entry to check for timeout * @now: Current timestamp * * Return: true if the flow is ready to free, false otherwise */ bool icmp_ping_timer(const struct ctx *c, union flow *flow, const struct timespec *now) { struct icmp_ping_flow *pingf = &flow->ping; if (now->tv_sec - pingf->ts <= ICMP_ECHO_TIMEOUT) return false; icmp_ping_close(c, pingf); return true; }