From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mail.ozlabs.org (mail.ozlabs.org [IPv6:2404:9400:2221:ea00::3]) by passt.top (Postfix) with ESMTPS id 904B15A031D for ; Fri, 05 Jul 2024 04:07:43 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gibson.dropbear.id.au; s=202312; t=1720145251; bh=xPDuJ9TF8CZiU6AiKzOhntbjmxk7bk9I87PnHdQw9Ws=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=lj7GM0KvvOGkIXTEgFlP5M0i7X10IihsGOTRj6IZskATF0d3dBJNI2sxVK12/bpA5 rvG2UyNXxmd4SBUy1Tlsp2aq8GVhMXeTsTWb2awC6zAF84KAODmDQtqi/1HbVYJHig i820DHgwWwxEmvllamR+D7rovWJLxF3WVdZSDYP9bc5kX8ZRW8OGIrBR6Y3+g6QCsA ZyV2UxTS289NutStsJ9nTVobO+UerUBnSJ/nrLKCRlPEEP3nu77XZ5RUh7ev+FNQKo yoPBNoIyfVCvCU5tnwv1eSnbkEJn625lGSCN31YHvm07I6nrjhcBZDbM30Z2xS2ZHz AqrtKEvVwIg9w== Received: by gandalf.ozlabs.org (Postfix, from userid 1007) id 4WFcNM2K2rz4xQP; Fri, 5 Jul 2024 12:07:31 +1000 (AEST) From: David Gibson To: Stefano Brivio , passt-dev@passt.top Subject: [PATCH v7 23/27] udp: Find or create flows for datagrams from tap interface Date: Fri, 5 Jul 2024 12:07:20 +1000 Message-ID: <20240705020724.3447719-24-david@gibson.dropbear.id.au> X-Mailer: git-send-email 2.45.2 In-Reply-To: <20240705020724.3447719-1-david@gibson.dropbear.id.au> References: <20240705020724.3447719-1-david@gibson.dropbear.id.au> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Message-ID-Hash: 5Z54VCX7DJ6OS2O4CVK5IJQDSTXW42V4 X-Message-ID-Hash: 5Z54VCX7DJ6OS2O4CVK5IJQDSTXW42V4 X-MailFrom: dgibson@gandalf.ozlabs.org X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header CC: jmaloy@redhat.com, David Gibson X-Mailman-Version: 3.3.8 Precedence: list List-Id: Development discussion and patches for passt Archived-At: Archived-At: List-Archive: List-Archive: List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: Currently we create flows for datagrams from socket interfaces, and use them to direct "spliced" (socket to socket) datagrams. We don't yet match datagrams from the tap interface to existing flows, nor create new flows for them. Add that functionality, matching datagrams from tap to existing flows when they exist, or creating new ones. As with spliced flows, when creating a new flow from tap to socket, we create a new connected socket to receive reply datagrams attached to that flow specifically. We extend udp_flow_sock_handler() to handle reply packets bound for tap rather than another socket. For non-obvious reasons, this caused a failure for me when running under valgrind, because valgrind invoked rt_sigreturn which is not in our seccomp filter. Since we already allow rt_signaction and others in the valgrind, it seems reasonable to add rt_sigreturn as well. Signed-off-by: David Gibson --- Makefile | 2 +- udp.c | 211 +++++++++++++++++++++++++------------------------------ udp.h | 4 +- 3 files changed, 99 insertions(+), 118 deletions(-) diff --git a/Makefile b/Makefile index 92cbd5a6..bd504d23 100644 --- a/Makefile +++ b/Makefile @@ -128,7 +128,7 @@ qrap: $(QRAP_SRCS) passt.h $(CC) $(FLAGS) $(CFLAGS) $(CPPFLAGS) $(QRAP_SRCS) -o qrap $(LDFLAGS) valgrind: EXTRA_SYSCALLS += rt_sigprocmask rt_sigtimedwait rt_sigaction \ - getpid gettid kill clock_gettime mmap \ + rt_sigreturn getpid gettid kill clock_gettime mmap \ munmap open unlink gettimeofday futex valgrind: FLAGS += -g -DVALGRIND valgrind: all diff --git a/udp.c b/udp.c index a4eb6d0f..a26ffe0c 100644 --- a/udp.c +++ b/udp.c @@ -103,6 +103,7 @@ #include #include #include +#include #include "checksum.h" #include "util.h" @@ -373,6 +374,8 @@ static void udp_flow_close(const struct ctx *c, const struct udp_flow *uflow) close(uflow->s[TGTSIDE]); } flow_hash_remove(c, FLOW_SIDX(uflow, INISIDE)); + if (!pif_is_socket(uflow->f.pif[TGTSIDE])) + flow_hash_remove(c, FLOW_SIDX(uflow, TGTSIDE)); } /** @@ -455,6 +458,13 @@ static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow, } flow_hash_insert(c, FLOW_SIDX(uflow, INISIDE)); + + /* If the target side is a socket, it will be a reply socket that knows + * its own flowside. But if it's tap, then we need to look it up by + * hash. + */ + if (!pif_is_socket(tgtpif)) + flow_hash_insert(c, FLOW_SIDX(uflow, TGTSIDE)); FLOW_ACTIVATE(uflow); return FLOW_SIDX(uflow, TGTSIDE); @@ -817,6 +827,8 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref, flow_sidx_t tosidx = flow_sidx_opposite(ref.flowside); struct udp_flow *uflow = udp_at_sidx(ref.flowside); const struct flowside *fromside = &uflow->f.side[ref.flowside.side]; + const struct flowside *toside = &uflow->f.side[tosidx.side]; + uint8_t topif = uflow->f.pif[tosidx.side]; bool v6 = !inany_v4(&fromside->eaddr); struct mmsghdr *mmh_recv = v6 ? udp6_mh_recv : udp4_mh_recv; int from_s = uflow->s[ref.flowside.side]; @@ -830,10 +842,64 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref, flow_trace(uflow, "Received %d datagrams on reply socket", n); uflow->ts = now->tv_sec; - for (i = 0; i < n; i++) - udp_splice_prepare(mmh_recv, i); + for (i = 0; i < n; i++) { + if (pif_is_socket(topif)) + udp_splice_prepare(mmh_recv, i); + else + udp_tap_prepare(c, mmh_recv, i, toside->eport, v6, now); + } - udp_splice_send(c, 0, n, tosidx); + if (pif_is_socket(topif)) + udp_splice_send(c, 0, n, tosidx); + else + tap_send_frames(c, &udp_l2_iov[0][0], UDP_NUM_IOVS, n); +} + +/** + * udp_flow_from_tap() - Find or create UDP flow for tap packets + * @c: Execution context + * @pif: pif on which the packet is arriving + * @af: Address family, AF_INET or AF_INET6 + * @saddr: Source address on guest side + * @daddr: Destination address guest side + * @srcport: Source port on guest side + * @dstport: Destination port on guest side + * + * Return: sidx for the destination side of the flow for this packet, or + * FLOW_SIDX_NONE if we couldn't find or create a flow. + */ +static flow_sidx_t udp_flow_from_tap(const struct ctx *c, + uint8_t pif, sa_family_t af, + const void *saddr, const void *daddr, + in_port_t srcport, in_port_t dstport, + const struct timespec *now) +{ + struct udp_flow *uflow; + union flow *flow; + flow_sidx_t sidx; + + ASSERT(pif == PIF_TAP); + + sidx = flow_lookup_af(c, IPPROTO_UDP, pif, af, saddr, daddr, + srcport, dstport); + if ((uflow = udp_at_sidx(sidx))) { + uflow->ts = now->tv_sec; + return flow_sidx_opposite(sidx); + } + + if (!(flow = flow_alloc())) { + char sstr[INET6_ADDRSTRLEN], dstr[INET6_ADDRSTRLEN]; + + debug("Couldn't allocate flow for UDP datagram from %s %s:%hu -> %s:%hu", + pif_name(pif), + inet_ntop(af, saddr, sstr, sizeof(sstr)), srcport, + inet_ntop(af, daddr, dstr, sizeof(dstr)), dstport); + return FLOW_SIDX_NONE; + } + + flow_initiate_af(flow, PIF_TAP, af, saddr, srcport, daddr, dstport); + + return udp_flow_new(c, flow, -1, now); } /** @@ -851,24 +917,22 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref, * * #syscalls sendmmsg */ -int udp_tap_handler(struct ctx *c, uint8_t pif, +int udp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, const void *saddr, const void *daddr, const struct pool *p, int idx, const struct timespec *now) { + const struct flowside *toside; struct mmsghdr mm[UIO_MAXIOV]; + union sockaddr_inany to_sa; struct iovec m[UIO_MAXIOV]; - struct sockaddr_in6 s_in6; - struct sockaddr_in s_in; const struct udphdr *uh; - struct sockaddr *sa; + struct udp_flow *uflow; int i, s, count = 0; + flow_sidx_t tosidx; in_port_t src, dst; + uint8_t topif; socklen_t sl; - (void)c; - (void)saddr; - (void)pif; - uh = packet_get(p, idx, 0, sizeof(*uh), NULL); if (!uh) return 1; @@ -877,116 +941,33 @@ int udp_tap_handler(struct ctx *c, uint8_t pif, * and destination, so we can just take those from the first message. */ src = ntohs(uh->source); - src += c->udp.fwd_in.rdelta[src]; dst = ntohs(uh->dest); - if (af == AF_INET) { - s_in = (struct sockaddr_in) { - .sin_family = AF_INET, - .sin_port = uh->dest, - .sin_addr = *(struct in_addr *)daddr, - }; + tosidx = udp_flow_from_tap(c, pif, af, saddr, daddr, src, dst, now); + if (!(uflow = udp_at_sidx(tosidx))) { + char sstr[INET6_ADDRSTRLEN], dstr[INET6_ADDRSTRLEN]; - sa = (struct sockaddr *)&s_in; - sl = sizeof(s_in); - - if (IN4_ARE_ADDR_EQUAL(&s_in.sin_addr, &c->ip4.dns_match) && - ntohs(s_in.sin_port) == 53) { - s_in.sin_addr = c->ip4.dns_host; - udp_tap_map[V4][src].ts = now->tv_sec; - udp_tap_map[V4][src].flags |= PORT_DNS_FWD; - bitmap_set(udp_act[V4][UDP_ACT_TAP], src); - } else if (IN4_ARE_ADDR_EQUAL(&s_in.sin_addr, &c->ip4.gw) && - !c->no_map_gw) { - if (!(udp_tap_map[V4][dst].flags & PORT_LOCAL) || - (udp_tap_map[V4][dst].flags & PORT_LOOPBACK)) - s_in.sin_addr.s_addr = htonl(INADDR_LOOPBACK); - else - s_in.sin_addr = c->ip4.addr_seen; - } - - debug("UDP from tap src=%hu dst=%hu, s=%d", - src, dst, udp_tap_map[V4][src].sock); - if ((s = udp_tap_map[V4][src].sock) < 0) { - struct in_addr bind_addr = IN4ADDR_ANY_INIT; - union udp_epoll_ref uref = { - .port = src, - .pif = PIF_HOST, - }; - const char *bind_if = NULL; - - if (!IN4_IS_ADDR_LOOPBACK(&s_in.sin_addr)) - bind_if = c->ip4.ifname_out; - - if (!IN4_IS_ADDR_LOOPBACK(&s_in.sin_addr)) - bind_addr = c->ip4.addr_out; - - s = sock_l4(c, AF_INET, EPOLL_TYPE_UDP, &bind_addr, - bind_if, src, uref.u32); - if (s < 0) - return p->count - idx; - - udp_tap_map[V4][src].sock = s; - bitmap_set(udp_act[V4][UDP_ACT_TAP], src); - } - - udp_tap_map[V4][src].ts = now->tv_sec; - } else { - s_in6 = (struct sockaddr_in6) { - .sin6_family = AF_INET6, - .sin6_port = uh->dest, - .sin6_addr = *(struct in6_addr *)daddr, - }; - const struct in6_addr *bind_addr = &in6addr_any; - - sa = (struct sockaddr *)&s_in6; - sl = sizeof(s_in6); - - if (IN6_ARE_ADDR_EQUAL(daddr, &c->ip6.dns_match) && - ntohs(s_in6.sin6_port) == 53) { - s_in6.sin6_addr = c->ip6.dns_host; - udp_tap_map[V6][src].ts = now->tv_sec; - udp_tap_map[V6][src].flags |= PORT_DNS_FWD; - bitmap_set(udp_act[V6][UDP_ACT_TAP], src); - } else if (IN6_ARE_ADDR_EQUAL(daddr, &c->ip6.gw) && - !c->no_map_gw) { - if (!(udp_tap_map[V6][dst].flags & PORT_LOCAL) || - (udp_tap_map[V6][dst].flags & PORT_LOOPBACK)) - s_in6.sin6_addr = in6addr_loopback; - else if (udp_tap_map[V6][dst].flags & PORT_GUA) - s_in6.sin6_addr = c->ip6.addr; - else - s_in6.sin6_addr = c->ip6.addr_seen; - } else if (IN6_IS_ADDR_LINKLOCAL(&s_in6.sin6_addr)) { - bind_addr = &c->ip6.addr_ll; - } - - if ((s = udp_tap_map[V6][src].sock) < 0) { - union udp_epoll_ref uref = { - .v6 = 1, - .port = src, - .pif = PIF_HOST, - }; - const char *bind_if = NULL; - - if (!IN6_IS_ADDR_LOOPBACK(&s_in6.sin6_addr)) - bind_if = c->ip6.ifname_out; + debug("Dropping datagram with no flow %s %s:%hu -> %s:%hu", + pif_name(pif), + inet_ntop(af, saddr, sstr, sizeof(sstr)), src, + inet_ntop(af, daddr, dstr, sizeof(dstr)), dst); + return 1; + } - if (!IN6_IS_ADDR_LOOPBACK(&s_in6.sin6_addr) && - !IN6_IS_ADDR_LINKLOCAL(&s_in6.sin6_addr)) - bind_addr = &c->ip6.addr_out; + topif = uflow->f.pif[tosidx.side]; + if (topif != PIF_HOST) { + uint8_t frompif = uflow->f.pif[!tosidx.side]; - s = sock_l4(c, AF_INET6, EPOLL_TYPE_UDP, bind_addr, - bind_if, src, uref.u32); - if (s < 0) - return p->count - idx; + flow_err(uflow, "No support for forwarding UDP from %s to %s", + pif_name(frompif), pif_name(topif)); + return 1; + } + toside = &uflow->f.side[tosidx.side]; - udp_tap_map[V6][src].sock = s; - bitmap_set(udp_act[V6][UDP_ACT_TAP], src); - } + s = udp_at_sidx(tosidx)->s[tosidx.side]; + ASSERT(s >= 0); - udp_tap_map[V6][src].ts = now->tv_sec; - } + pif_sockaddr(c, &to_sa, &sl, topif, &toside->eaddr, toside->eport); for (i = 0; i < (int)p->count - idx; i++) { struct udphdr *uh_send; @@ -996,7 +977,7 @@ int udp_tap_handler(struct ctx *c, uint8_t pif, if (!uh_send) return p->count - idx; - mm[i].msg_hdr.msg_name = sa; + mm[i].msg_hdr.msg_name = &to_sa; mm[i].msg_hdr.msg_namelen = sl; if (len) { diff --git a/udp.h b/udp.h index 310f42fd..88417fb0 100644 --- a/udp.h +++ b/udp.h @@ -13,8 +13,8 @@ void udp_buf_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events, const struct timespec *now); void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events, const struct timespec *now); -int udp_tap_handler(struct ctx *c, uint8_t pif, sa_family_t af, - const void *saddr, const void *daddr, +int udp_tap_handler(const struct ctx *c, uint8_t pif, + sa_family_t af, const void *saddr, const void *daddr, const struct pool *p, int idx, const struct timespec *now); int udp_sock_init(const struct ctx *c, int ns, sa_family_t af, const void *addr, const char *ifname, in_port_t port); -- 2.45.2