From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mail.ozlabs.org (mail.ozlabs.org [IPv6:2404:9400:2221:ea00::3]) by passt.top (Postfix) with ESMTPS id 380335A026F for ; Fri, 14 Jun 2024 08:14:02 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gibson.dropbear.id.au; s=202312; t=1718345632; bh=X7tuQn9TyBmAyk4GZ/4ncVV5YRHsXOttOJzi3GeoqMo=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=BwFieQ8PEVq5xTJeqTSDrwaUctxRaMOdn+vjhLO6EAA0b/RrLMYwp6QbHaLkJ3H97 MmZMMkpF+FZ75T+XNWJoUiv2X4N5HzO/ofnoDepKnCqX4M7lizwyqZ8g1JMuQQBUo5 dNIP9xN+RIgytxqkxk+THPMNJqL1fmj/KaST0AwR9gMuSWDQly4D6nojtpDTGkz3Zj 8PbjUqw38bT8/9DIsovCyxQr/W5HqrKoGSfIs3QtgSh10aYuFeJprLdpn+l5mfrexS 5cKgXB84vBxxmQ+hO2d3D5VMqpGLDQ3lOXrayfejwWBri0ijK1cGuj9bNHoOETHh/J wUW7ZztzNOJlw== Received: by gandalf.ozlabs.org (Postfix, from userid 1007) id 4W0prJ3Gm5z4x1T; Fri, 14 Jun 2024 16:13:52 +1000 (AEST) From: David Gibson To: Stefano Brivio , passt-dev@passt.top Subject: [PATCH v6 20/26] udp: Direct traffic from tap according to flow table Date: Fri, 14 Jun 2024 16:13:42 +1000 Message-ID: <20240614061348.3814736-21-david@gibson.dropbear.id.au> X-Mailer: git-send-email 2.45.2 In-Reply-To: <20240614061348.3814736-1-david@gibson.dropbear.id.au> References: <20240614061348.3814736-1-david@gibson.dropbear.id.au> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Message-ID-Hash: MBBWM2LPF4AFS6ZKGWGCADRKLXFDZQYI X-Message-ID-Hash: MBBWM2LPF4AFS6ZKGWGCADRKLXFDZQYI X-MailFrom: dgibson@gandalf.ozlabs.org X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header CC: jmaloy@redhat.com, David Gibson X-Mailman-Version: 3.3.8 Precedence: list List-Id: Development discussion and patches for passt Archived-At: Archived-At: List-Archive: List-Archive: List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: Although we construct flow entries for UDP packets, we don't yet actually direct traffic according to the information in there. Start fixing that by directing traffic originating from the tap device according to the flow table. Signed-off-by: David Gibson --- udp.c | 153 +++++++++++++++++++++------------------------------------- udp.h | 4 +- 2 files changed, 58 insertions(+), 99 deletions(-) diff --git a/udp.c b/udp.c index cb6db5c5..4668690e 100644 --- a/udp.c +++ b/udp.c @@ -52,6 +52,27 @@ * scan flows and those which are older than UDP_CONN_TIMEOUT (180s) are * removed. * + * - Locating or creating an outgoing socket + * + * When forwarding to a socket based interface, we need to find a suitable + * socket to send via. Generally this should have a bound address and port + * matching the forwarding address and port of the flowside for the outgoing + * datagram. However, if we have an existing socket with a matching port and + * an "any" address, we need to use that (in that case a socket with a + * specific bound address would conflict). + * + * FIXME: currently we don't perform this lookup correctly. Instead we abuse + * the fact that it's rare to have multiple flows with the same forwarding + * address but different forwarding port. We store at most a single socket + * per per bound port number (and IP version). For datagrams forwarded from + * PIF_TAP to PIF_HOST these are in udp_tap_map[]. + * + * For ports where port forwarding is configured (-u option) a socket is + * opened during start up, bound to the specified forwarding address and + * stored in udp_tap_map[]. For other ports we open a socket when we first + * need to forward a datagram from that port, bound to the configured outbound + * address (which may be "any"). + * * Port Tracking * ============= * @@ -149,6 +170,7 @@ #include #include #include +#include #include "checksum.h" #include "util.h" @@ -1025,20 +1047,21 @@ cancel: * * #syscalls sendmmsg */ -int udp_tap_handler(struct ctx *c, uint8_t pif, +int udp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, const void *saddr, const void *daddr, const struct pool *p, int idx, const struct timespec *now) { + const struct flowside *toside; struct mmsghdr mm[UIO_MAXIOV]; struct iovec m[UIO_MAXIOV]; - struct sockaddr_in6 s_in6; - struct sockaddr_in s_in; + union udp_epoll_ref uref; + union sockaddr_inany sa; const struct udphdr *uh; struct udp_flow *uflow; - struct sockaddr *sa; int i, s, count = 0; - in_port_t src, dst; flow_sidx_t sidx; + in_port_t src; + uint8_t topif; socklen_t sl; uh = packet_get(p, idx, 0, sizeof(*uh), NULL); @@ -1048,59 +1071,36 @@ int udp_tap_handler(struct ctx *c, uint8_t pif, /* The caller already checks that all the messages have the same source * and destination, so we can just take those from the first message. */ - src = ntohs(uh->source); - dst = ntohs(uh->dest); - sidx = udp_flow_from_tap(c, pif, af, saddr, daddr, src, dst); - if ((uflow = udp_at_sidx(sidx))) - uflow->ts = now->tv_sec; - else - debug("UDP from tap without flow"); + sidx = udp_flow_from_tap(c, pif, af, saddr, daddr, + ntohs(uh->source), ntohs(uh->dest)); + if (!(uflow = udp_at_sidx(sidx))) { + char sstr[INANY_ADDRSTRLEN], dstr[INANY_ADDRSTRLEN]; - src += c->udp.fwd_in.rdelta[src]; + debug("Dropping UDP packet without flow %s %s:%hu -> %s:%hu", + pif_name(pif), + inet_ntop(af, saddr, sstr, sizeof(sstr)), + ntohs(uh->source), + inet_ntop(af, daddr, dstr, sizeof(dstr)), + ntohs(uh->dest)); + return 1; + } - if (af == AF_INET) { - s_in = (struct sockaddr_in) { - .sin_family = AF_INET, - .sin_port = uh->dest, - .sin_addr = *(struct in_addr *)daddr, - }; + topif = uflow->f.pif[sidx.side]; + toside = &uflow->f.side[sidx.side]; - sa = (struct sockaddr *)&s_in; - sl = sizeof(s_in); + ASSERT(topif == PIF_HOST); - if (IN4_ARE_ADDR_EQUAL(&s_in.sin_addr, &c->ip4.dns_match) && - ntohs(s_in.sin_port) == 53) { - s_in.sin_addr = c->ip4.dns_host; - udp_tap_map[V4][src].ts = now->tv_sec; - udp_tap_map[V4][src].flags |= PORT_DNS_FWD; - bitmap_set(udp_act[V4][UDP_ACT_TAP], src); - } else if (IN4_ARE_ADDR_EQUAL(&s_in.sin_addr, &c->ip4.gw) && - !c->no_map_gw) { - if (!(udp_tap_map[V4][dst].flags & PORT_LOCAL) || - (udp_tap_map[V4][dst].flags & PORT_LOOPBACK)) - s_in.sin_addr.s_addr = htonl(INADDR_LOOPBACK); - else - s_in.sin_addr = c->ip4.addr_seen; - } + uflow->ts = now->tv_sec; - debug("UDP from tap src=%hu dst=%hu, s=%d", - src, dst, udp_tap_map[V4][src].sock); - if ((s = udp_tap_map[V4][src].sock) < 0) { - struct in_addr bind_addr = IN4ADDR_ANY_INIT; - union udp_epoll_ref uref = { - .port = src, - .pif = PIF_HOST, - }; - const char *bind_if = NULL; - - if (!IN4_IS_ADDR_LOOPBACK(&s_in.sin_addr)) - bind_if = c->ip4.ifname_out; + sockaddr_from_inany(&sa, &sl, &toside->eaddr, toside->eport, c->ifi6); + src = toside->fport; + uref.port = src; + uref.pif = topif; - if (!IN4_IS_ADDR_LOOPBACK(&s_in.sin_addr)) - bind_addr = c->ip4.addr_out; - - s = sock_l4(c, AF_INET, IPPROTO_UDP, &bind_addr, - bind_if, src, uref.u32); + if (sa.sa_family == AF_INET) { + if ((s = udp_tap_map[V4][src].sock) < 0) { + s = flowside_sock_l4(c, IPPROTO_UDP, topif, toside, + uref.u32); if (s < 0) return p->count - idx; @@ -1110,52 +1110,11 @@ int udp_tap_handler(struct ctx *c, uint8_t pif, udp_tap_map[V4][src].ts = now->tv_sec; } else { - s_in6 = (struct sockaddr_in6) { - .sin6_family = AF_INET6, - .sin6_port = uh->dest, - .sin6_addr = *(struct in6_addr *)daddr, - }; - const struct in6_addr *bind_addr = &in6addr_any; - - sa = (struct sockaddr *)&s_in6; - sl = sizeof(s_in6); - - if (IN6_ARE_ADDR_EQUAL(daddr, &c->ip6.dns_match) && - ntohs(s_in6.sin6_port) == 53) { - s_in6.sin6_addr = c->ip6.dns_host; - udp_tap_map[V6][src].ts = now->tv_sec; - udp_tap_map[V6][src].flags |= PORT_DNS_FWD; - bitmap_set(udp_act[V6][UDP_ACT_TAP], src); - } else if (IN6_ARE_ADDR_EQUAL(daddr, &c->ip6.gw) && - !c->no_map_gw) { - if (!(udp_tap_map[V6][dst].flags & PORT_LOCAL) || - (udp_tap_map[V6][dst].flags & PORT_LOOPBACK)) - s_in6.sin6_addr = in6addr_loopback; - else if (udp_tap_map[V6][dst].flags & PORT_GUA) - s_in6.sin6_addr = c->ip6.addr; - else - s_in6.sin6_addr = c->ip6.addr_seen; - } else if (IN6_IS_ADDR_LINKLOCAL(&s_in6.sin6_addr)) { - bind_addr = &c->ip6.addr_ll; - } - if ((s = udp_tap_map[V6][src].sock) < 0) { - union udp_epoll_ref uref = { - .v6 = 1, - .port = src, - .pif = PIF_HOST, - }; - const char *bind_if = NULL; - - if (!IN6_IS_ADDR_LOOPBACK(&s_in6.sin6_addr)) - bind_if = c->ip6.ifname_out; - - if (!IN6_IS_ADDR_LOOPBACK(&s_in6.sin6_addr) && - !IN6_IS_ADDR_LINKLOCAL(&s_in6.sin6_addr)) - bind_addr = &c->ip6.addr_out; + uref.v6 = 1; - s = sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, - bind_if, src, uref.u32); + s = flowside_sock_l4(c, IPPROTO_UDP, topif, toside, + uref.u32); if (s < 0) return p->count - idx; @@ -1174,7 +1133,7 @@ int udp_tap_handler(struct ctx *c, uint8_t pif, if (!uh_send) return p->count - idx; - mm[i].msg_hdr.msg_name = sa; + mm[i].msg_hdr.msg_name = &sa; mm[i].msg_hdr.msg_namelen = sl; if (len) { diff --git a/udp.h b/udp.h index 5865def2..d25e66cb 100644 --- a/udp.h +++ b/udp.h @@ -11,8 +11,8 @@ void udp_portmap_clear(void); void udp_buf_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events, const struct timespec *now); -int udp_tap_handler(struct ctx *c, uint8_t pif, sa_family_t af, - const void *saddr, const void *daddr, +int udp_tap_handler(const struct ctx *c, uint8_t pif, + sa_family_t af, const void *saddr, const void *daddr, const struct pool *p, int idx, const struct timespec *now); int udp_sock_init(const struct ctx *c, int ns, sa_family_t af, const void *addr, const char *ifname, in_port_t port); -- 2.45.2