From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>, passt-dev@passt.top
Cc: jmaloy@redhat.com, David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v6 20/26] udp: Direct traffic from tap according to flow table
Date: Fri, 14 Jun 2024 16:13:42 +1000 [thread overview]
Message-ID: <20240614061348.3814736-21-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20240614061348.3814736-1-david@gibson.dropbear.id.au>
Although we construct flow entries for UDP packets, we don't yet actually
direct traffic according to the information in there. Start fixing that
by directing traffic originating from the tap device according to the flow
table.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
udp.c | 153 +++++++++++++++++++++-------------------------------------
udp.h | 4 +-
2 files changed, 58 insertions(+), 99 deletions(-)
diff --git a/udp.c b/udp.c
index cb6db5c5..4668690e 100644
--- a/udp.c
+++ b/udp.c
@@ -52,6 +52,27 @@
* scan flows and those which are older than UDP_CONN_TIMEOUT (180s) are
* removed.
*
+ * - Locating or creating an outgoing socket
+ *
+ * When forwarding to a socket based interface, we need to find a suitable
+ * socket to send via. Generally this should have a bound address and port
+ * matching the forwarding address and port of the flowside for the outgoing
+ * datagram. However, if we have an existing socket with a matching port and
+ * an "any" address, we need to use that (in that case a socket with a
+ * specific bound address would conflict).
+ *
+ * FIXME: currently we don't perform this lookup correctly. Instead we abuse
+ * the fact that it's rare to have multiple flows with the same forwarding
+ * address but different forwarding port. We store at most a single socket
+ * per per bound port number (and IP version). For datagrams forwarded from
+ * PIF_TAP to PIF_HOST these are in udp_tap_map[].
+ *
+ * For ports where port forwarding is configured (-u option) a socket is
+ * opened during start up, bound to the specified forwarding address and
+ * stored in udp_tap_map[]. For other ports we open a socket when we first
+ * need to forward a datagram from that port, bound to the configured outbound
+ * address (which may be "any").
+ *
* Port Tracking
* =============
*
@@ -149,6 +170,7 @@
#include <sys/socket.h>
#include <sys/uio.h>
#include <time.h>
+#include <arpa/inet.h>
#include "checksum.h"
#include "util.h"
@@ -1025,20 +1047,21 @@ cancel:
*
* #syscalls sendmmsg
*/
-int udp_tap_handler(struct ctx *c, uint8_t pif,
+int udp_tap_handler(const struct ctx *c, uint8_t pif,
sa_family_t af, const void *saddr, const void *daddr,
const struct pool *p, int idx, const struct timespec *now)
{
+ const struct flowside *toside;
struct mmsghdr mm[UIO_MAXIOV];
struct iovec m[UIO_MAXIOV];
- struct sockaddr_in6 s_in6;
- struct sockaddr_in s_in;
+ union udp_epoll_ref uref;
+ union sockaddr_inany sa;
const struct udphdr *uh;
struct udp_flow *uflow;
- struct sockaddr *sa;
int i, s, count = 0;
- in_port_t src, dst;
flow_sidx_t sidx;
+ in_port_t src;
+ uint8_t topif;
socklen_t sl;
uh = packet_get(p, idx, 0, sizeof(*uh), NULL);
@@ -1048,59 +1071,36 @@ int udp_tap_handler(struct ctx *c, uint8_t pif,
/* The caller already checks that all the messages have the same source
* and destination, so we can just take those from the first message.
*/
- src = ntohs(uh->source);
- dst = ntohs(uh->dest);
- sidx = udp_flow_from_tap(c, pif, af, saddr, daddr, src, dst);
- if ((uflow = udp_at_sidx(sidx)))
- uflow->ts = now->tv_sec;
- else
- debug("UDP from tap without flow");
+ sidx = udp_flow_from_tap(c, pif, af, saddr, daddr,
+ ntohs(uh->source), ntohs(uh->dest));
+ if (!(uflow = udp_at_sidx(sidx))) {
+ char sstr[INANY_ADDRSTRLEN], dstr[INANY_ADDRSTRLEN];
- src += c->udp.fwd_in.rdelta[src];
+ debug("Dropping UDP packet without flow %s %s:%hu -> %s:%hu",
+ pif_name(pif),
+ inet_ntop(af, saddr, sstr, sizeof(sstr)),
+ ntohs(uh->source),
+ inet_ntop(af, daddr, dstr, sizeof(dstr)),
+ ntohs(uh->dest));
+ return 1;
+ }
- if (af == AF_INET) {
- s_in = (struct sockaddr_in) {
- .sin_family = AF_INET,
- .sin_port = uh->dest,
- .sin_addr = *(struct in_addr *)daddr,
- };
+ topif = uflow->f.pif[sidx.side];
+ toside = &uflow->f.side[sidx.side];
- sa = (struct sockaddr *)&s_in;
- sl = sizeof(s_in);
+ ASSERT(topif == PIF_HOST);
- if (IN4_ARE_ADDR_EQUAL(&s_in.sin_addr, &c->ip4.dns_match) &&
- ntohs(s_in.sin_port) == 53) {
- s_in.sin_addr = c->ip4.dns_host;
- udp_tap_map[V4][src].ts = now->tv_sec;
- udp_tap_map[V4][src].flags |= PORT_DNS_FWD;
- bitmap_set(udp_act[V4][UDP_ACT_TAP], src);
- } else if (IN4_ARE_ADDR_EQUAL(&s_in.sin_addr, &c->ip4.gw) &&
- !c->no_map_gw) {
- if (!(udp_tap_map[V4][dst].flags & PORT_LOCAL) ||
- (udp_tap_map[V4][dst].flags & PORT_LOOPBACK))
- s_in.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
- else
- s_in.sin_addr = c->ip4.addr_seen;
- }
+ uflow->ts = now->tv_sec;
- debug("UDP from tap src=%hu dst=%hu, s=%d",
- src, dst, udp_tap_map[V4][src].sock);
- if ((s = udp_tap_map[V4][src].sock) < 0) {
- struct in_addr bind_addr = IN4ADDR_ANY_INIT;
- union udp_epoll_ref uref = {
- .port = src,
- .pif = PIF_HOST,
- };
- const char *bind_if = NULL;
-
- if (!IN4_IS_ADDR_LOOPBACK(&s_in.sin_addr))
- bind_if = c->ip4.ifname_out;
+ sockaddr_from_inany(&sa, &sl, &toside->eaddr, toside->eport, c->ifi6);
+ src = toside->fport;
+ uref.port = src;
+ uref.pif = topif;
- if (!IN4_IS_ADDR_LOOPBACK(&s_in.sin_addr))
- bind_addr = c->ip4.addr_out;
-
- s = sock_l4(c, AF_INET, IPPROTO_UDP, &bind_addr,
- bind_if, src, uref.u32);
+ if (sa.sa_family == AF_INET) {
+ if ((s = udp_tap_map[V4][src].sock) < 0) {
+ s = flowside_sock_l4(c, IPPROTO_UDP, topif, toside,
+ uref.u32);
if (s < 0)
return p->count - idx;
@@ -1110,52 +1110,11 @@ int udp_tap_handler(struct ctx *c, uint8_t pif,
udp_tap_map[V4][src].ts = now->tv_sec;
} else {
- s_in6 = (struct sockaddr_in6) {
- .sin6_family = AF_INET6,
- .sin6_port = uh->dest,
- .sin6_addr = *(struct in6_addr *)daddr,
- };
- const struct in6_addr *bind_addr = &in6addr_any;
-
- sa = (struct sockaddr *)&s_in6;
- sl = sizeof(s_in6);
-
- if (IN6_ARE_ADDR_EQUAL(daddr, &c->ip6.dns_match) &&
- ntohs(s_in6.sin6_port) == 53) {
- s_in6.sin6_addr = c->ip6.dns_host;
- udp_tap_map[V6][src].ts = now->tv_sec;
- udp_tap_map[V6][src].flags |= PORT_DNS_FWD;
- bitmap_set(udp_act[V6][UDP_ACT_TAP], src);
- } else if (IN6_ARE_ADDR_EQUAL(daddr, &c->ip6.gw) &&
- !c->no_map_gw) {
- if (!(udp_tap_map[V6][dst].flags & PORT_LOCAL) ||
- (udp_tap_map[V6][dst].flags & PORT_LOOPBACK))
- s_in6.sin6_addr = in6addr_loopback;
- else if (udp_tap_map[V6][dst].flags & PORT_GUA)
- s_in6.sin6_addr = c->ip6.addr;
- else
- s_in6.sin6_addr = c->ip6.addr_seen;
- } else if (IN6_IS_ADDR_LINKLOCAL(&s_in6.sin6_addr)) {
- bind_addr = &c->ip6.addr_ll;
- }
-
if ((s = udp_tap_map[V6][src].sock) < 0) {
- union udp_epoll_ref uref = {
- .v6 = 1,
- .port = src,
- .pif = PIF_HOST,
- };
- const char *bind_if = NULL;
-
- if (!IN6_IS_ADDR_LOOPBACK(&s_in6.sin6_addr))
- bind_if = c->ip6.ifname_out;
-
- if (!IN6_IS_ADDR_LOOPBACK(&s_in6.sin6_addr) &&
- !IN6_IS_ADDR_LINKLOCAL(&s_in6.sin6_addr))
- bind_addr = &c->ip6.addr_out;
+ uref.v6 = 1;
- s = sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr,
- bind_if, src, uref.u32);
+ s = flowside_sock_l4(c, IPPROTO_UDP, topif, toside,
+ uref.u32);
if (s < 0)
return p->count - idx;
@@ -1174,7 +1133,7 @@ int udp_tap_handler(struct ctx *c, uint8_t pif,
if (!uh_send)
return p->count - idx;
- mm[i].msg_hdr.msg_name = sa;
+ mm[i].msg_hdr.msg_name = &sa;
mm[i].msg_hdr.msg_namelen = sl;
if (len) {
diff --git a/udp.h b/udp.h
index 5865def2..d25e66cb 100644
--- a/udp.h
+++ b/udp.h
@@ -11,8 +11,8 @@
void udp_portmap_clear(void);
void udp_buf_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events,
const struct timespec *now);
-int udp_tap_handler(struct ctx *c, uint8_t pif, sa_family_t af,
- const void *saddr, const void *daddr,
+int udp_tap_handler(const struct ctx *c, uint8_t pif,
+ sa_family_t af, const void *saddr, const void *daddr,
const struct pool *p, int idx, const struct timespec *now);
int udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
const void *addr, const char *ifname, in_port_t port);
--
@@ -11,8 +11,8 @@
void udp_portmap_clear(void);
void udp_buf_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events,
const struct timespec *now);
-int udp_tap_handler(struct ctx *c, uint8_t pif, sa_family_t af,
- const void *saddr, const void *daddr,
+int udp_tap_handler(const struct ctx *c, uint8_t pif,
+ sa_family_t af, const void *saddr, const void *daddr,
const struct pool *p, int idx, const struct timespec *now);
int udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
const void *addr, const char *ifname, in_port_t port);
--
2.45.2
next prev parent reply other threads:[~2024-06-14 6:14 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-14 6:13 [PATCH v6 00/26] RFC: Unified flow table David Gibson
2024-06-14 6:13 ` [PATCH v6 01/26] flow: Common address information for initiating side David Gibson
2024-06-25 22:23 ` Stefano Brivio
2024-06-26 0:19 ` David Gibson
2024-06-14 6:13 ` [PATCH v6 02/26] flow: Common address information for target side David Gibson
2024-06-25 22:23 ` Stefano Brivio
2024-06-26 0:25 ` David Gibson
2024-06-14 6:13 ` [PATCH v6 03/26] tcp, flow: Remove redundant information, repack connection structures David Gibson
2024-06-25 22:25 ` Stefano Brivio
2024-06-26 0:23 ` David Gibson
2024-06-14 6:13 ` [PATCH v6 04/26] tcp: Obtain guest address from flowside David Gibson
2024-06-14 6:13 ` [PATCH v6 05/26] tcp: Manage outbound address via flow table David Gibson
2024-06-14 6:13 ` [PATCH v6 06/26] tcp: Simplify endpoint validation using flowside information David Gibson
2024-06-14 6:13 ` [PATCH v6 07/26] tcp_splice: Eliminate SPLICE_V6 flag David Gibson
2024-06-14 6:13 ` [PATCH v6 08/26] tcp, flow: Replace TCP specific hash function with general flow hash David Gibson
2024-06-14 6:13 ` [PATCH v6 09/26] flow, tcp: Generalise TCP hash table to general flow hash table David Gibson
2024-06-14 6:13 ` [PATCH v6 10/26] tcp: Re-use flow hash for initial sequence number generation David Gibson
2024-06-14 6:13 ` [PATCH v6 11/26] icmp: Remove redundant id field from flow table entry David Gibson
2024-06-14 6:13 ` [PATCH v6 12/26] icmp: Obtain destination addresses from the flowsides David Gibson
2024-06-14 6:13 ` [PATCH v6 13/26] icmp: Look up ping flows using flow hash David Gibson
2024-06-14 6:13 ` [PATCH v6 14/26] icmp: Eliminate icmp_id_map David Gibson
2024-06-14 6:13 ` [PATCH v6 15/26] icmp: Manage outbound socket address via flow table David Gibson
2024-06-14 6:13 ` [PATCH v6 16/26] flow, tcp: Flow based NAT and port forwarding for TCP David Gibson
2024-06-26 22:49 ` Stefano Brivio
2024-06-27 5:55 ` David Gibson
2024-06-14 6:13 ` [PATCH v6 17/26] flow, icmp: Use general flow forwarding rules for ICMP David Gibson
2024-06-14 6:13 ` [PATCH v6 18/26] fwd: Update flow forwarding logic for UDP David Gibson
2024-06-14 6:13 ` [PATCH v6 19/26] udp: Create flow table entries " David Gibson
2024-06-14 6:13 ` David Gibson [this message]
2024-06-14 6:13 ` [PATCH v6 21/26] udp: Direct traffic from host to guest tap according to flow table David Gibson
2024-06-14 6:13 ` [PATCH v6 22/26] udp: Direct spliced traffic " David Gibson
2024-06-14 6:13 ` [PATCH v6 23/26] udp: Remove 'splicesrc' tracking David Gibson
2024-06-14 6:13 ` [PATCH v6 24/26] udp: Remove tap port flags field David Gibson
2024-06-14 6:13 ` [PATCH v6 25/26] udp: Remove rdelta port forwarding maps David Gibson
2024-06-14 6:13 ` [PATCH v6 26/26] udp: Eliminate 'splice' flag from epoll reference David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240614061348.3814736-21-david@gibson.dropbear.id.au \
--to=david@gibson.dropbear.id.au \
--cc=jmaloy@redhat.com \
--cc=passt-dev@passt.top \
--cc=sbrivio@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).