From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mail.ozlabs.org (mail.ozlabs.org [IPv6:2404:9400:2221:ea00::3]) by passt.top (Postfix) with ESMTPS id AE57C5A02DF for ; Tue, 14 May 2024 03:03:49 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gibson.dropbear.id.au; s=202312; t=1715648622; bh=lDpFv+6hZneAXveUk8CZjnIfRVK9N0ULAkBCfTHN3RI=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=Nvt1WW8UxU66ZvemFkUcd4dOyNPvfqCW/XmyDGNoFT6F37n31Gr5mEYGh/sX1ThJd QSilIv8+0RY060STS5gVn6Frm+Klu8NXEMfpUygNwsZLoFMuDQEB6uCv7nNZFIY5/E igdRpgXkuMxWy4OBJmAbQl6VDiPPZlHwZED2tsQiNzrE7RJd46r08FmolS6Elwn6si XkY8/yMZoW/1C/Oy9N7n8VoXQuONOfwaxWiWsx7tEATzizlpUBCzmR3ueV7AnLmXUB XfwHOYuejmfgVDfovHUTd0fCs/xPK5yVpMzmp9AcdwvhZj36Z6tK/tG4zG/F5bvEB6 8B6q1hw04197A== Received: by gandalf.ozlabs.org (Postfix, from userid 1007) id 4VddQk08Lyz4x0v; Tue, 14 May 2024 11:03:42 +1000 (AEST) From: David Gibson To: Stefano Brivio , passt-dev@passt.top Subject: [PATCH v5 07/19] flow: Populate address information for non-initiating side Date: Tue, 14 May 2024 11:03:25 +1000 Message-ID: <20240514010337.1104606-8-david@gibson.dropbear.id.au> X-Mailer: git-send-email 2.45.0 In-Reply-To: <20240514010337.1104606-1-david@gibson.dropbear.id.au> References: <20240514010337.1104606-1-david@gibson.dropbear.id.au> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Message-ID-Hash: QFEGOMLEM4UJAWDXZEFEIOJ756RVVQ4D X-Message-ID-Hash: QFEGOMLEM4UJAWDXZEFEIOJ756RVVQ4D X-MailFrom: dgibson@gandalf.ozlabs.org X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header CC: David Gibson X-Mailman-Version: 3.3.8 Precedence: list List-Id: Development discussion and patches for passt Archived-At: Archived-At: List-Archive: List-Archive: List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: This requires the address and port information for the forwarded (non initiating) side to be populated when a flow enters FWD state. Implement that for TCP and ICMP. For now this leaves some information redundantly recorded in both generic and type specific fields. We'll fix that in later patches. For TCP we now use the information from the flow to construct the destination socket address in both tcp_conn_from_tap() and tcp_splice_connect(). Signed-off-by: David Gibson --- flow.c | 38 ++++++++++++++++++------ flow_table.h | 5 +++- icmp.c | 2 +- inany.h | 28 ++++++++++++++++++ tcp.c | 83 ++++++++++++++++++++++++++++------------------------ tcp_splice.c | 44 ++++++++++------------------ 6 files changed, 123 insertions(+), 77 deletions(-) diff --git a/flow.c b/flow.c index 3d5b3a5..aff077b 100644 --- a/flow.c +++ b/flow.c @@ -165,8 +165,10 @@ void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...) */ static void flow_set_state(struct flow_common *f, enum flow_state state) { - char estr[INANY_ADDRSTRLEN], fstr[INANY_ADDRSTRLEN]; + char estr0[INANY_ADDRSTRLEN], fstr0[INANY_ADDRSTRLEN]; + char estr1[INANY_ADDRSTRLEN], fstr1[INANY_ADDRSTRLEN]; const struct flowside *ini = &f->side[INISIDE]; + const struct flowside *fwd = &f->side[FWDSIDE]; uint8_t oldstate = f->state; ASSERT(state < FLOW_NUM_STATES); @@ -177,19 +179,24 @@ static void flow_set_state(struct flow_common *f, enum flow_state state) FLOW_STATE(f)); if (MAX(state, oldstate) >= FLOW_STATE_FWD) - flow_log_(f, LOG_DEBUG, "%s [%s]:%hu -> [%s]:%hu => %s", + flow_log_(f, LOG_DEBUG, + "%s [%s]:%hu -> [%s]:%hu => %s [%s]:%hu -> [%s]:%hu", pif_name(f->pif[INISIDE]), - inany_ntop(&ini->eaddr, estr, sizeof(estr)), + inany_ntop(&ini->eaddr, estr0, sizeof(estr0)), ini->eport, - inany_ntop(&ini->faddr, fstr, sizeof(fstr)), + inany_ntop(&ini->faddr, fstr0, sizeof(fstr0)), ini->fport, - pif_name(f->pif[FWDSIDE])); + pif_name(f->pif[FWDSIDE]), + inany_ntop(&fwd->faddr, fstr1, sizeof(fstr1)), + ini->fport, + inany_ntop(&fwd->eaddr, estr1, sizeof(estr1)), + ini->eport); else if (MAX(state, oldstate) >= FLOW_STATE_INI) flow_log_(f, LOG_DEBUG, "%s [%s]:%hu -> [%s]:%hu => ?", pif_name(f->pif[INISIDE]), - inany_ntop(&ini->eaddr, estr, sizeof(estr)), + inany_ntop(&ini->eaddr, estr0, sizeof(estr0)), ini->eport, - inany_ntop(&ini->faddr, fstr, sizeof(fstr)), + inany_ntop(&ini->faddr, fstr0, sizeof(fstr0)), ini->fport); } @@ -257,21 +264,34 @@ const struct flowside *flow_initiate_sa(union flow *flow, uint8_t pif, } /** - * flow_forward() - Move flow to FWD state, setting FWDSIDE details + * flow_forward_af() - Move flow to FWD state, setting FWDSIDE details * @flow: Flow to change state * @pif: pif of the forwarded side + * @af: Address family for @eaddr and @faddr + * @saddr: Source address (pointer to in_addr or in6_addr) + * @sport: Endpoint port + * @daddr: Destination address (pointer to in_addr or in6_addr) + * @dport: Destination port + * + * Return: pointer to the forwarded flowside information */ -void flow_forward(union flow *flow, uint8_t pif) +const struct flowside *flow_forward_af(union flow *flow, uint8_t pif, + sa_family_t af, + const void *saddr, in_port_t sport, + const void *daddr, in_port_t dport) { struct flow_common *f = &flow->f; + struct flowside *fwd = &f->side[FWDSIDE]; ASSERT(pif != PIF_NONE); ASSERT(flow_new_entry == flow && f->state == FLOW_STATE_INI); ASSERT(f->type == FLOW_TYPE_NONE); ASSERT(f->pif[INISIDE] != PIF_NONE && f->pif[FWDSIDE] == PIF_NONE); + flowside_from_af(fwd, af, daddr, dport, saddr, sport); f->pif[FWDSIDE] = pif; flow_set_state(f, FLOW_STATE_FWD); + return fwd; } /** diff --git a/flow_table.h b/flow_table.h index ca7f228..91ade0a 100644 --- a/flow_table.h +++ b/flow_table.h @@ -114,7 +114,10 @@ const struct flowside *flow_initiate_af(union flow *flow, uint8_t pif, const struct flowside *flow_initiate_sa(union flow *flow, uint8_t pif, const union sockaddr_inany *ssa, in_port_t dport); -void flow_forward(union flow *flow, uint8_t pif); +const struct flowside *flow_forward_af(union flow *flow, uint8_t pif, + sa_family_t af, + const void *saddr, in_port_t sport, + const void *daddr, in_port_t dport); union flow *flow_set_type(union flow *flow, enum flow_type type); #define FLOW_SET_TYPE(flow_, t_, var_) (&flow_set_type((flow_), (t_))->var_) diff --git a/icmp.c b/icmp.c index 90708fe..37a3586 100644 --- a/icmp.c +++ b/icmp.c @@ -168,7 +168,7 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c, flow_initiate_af(flow, PIF_TAP, af, saddr, id, daddr, id); - flow_forward(flow, PIF_HOST); + flow_forward_af(flow, PIF_HOST, af, NULL, 0, daddr, 0); pingf = FLOW_SET_TYPE(flow, flowtype, ping); pingf->seq = -1; diff --git a/inany.h b/inany.h index 407690e..d962ff3 100644 --- a/inany.h +++ b/inany.h @@ -184,4 +184,32 @@ static inline void inany_siphash_feed(struct siphash_state *state, const char *inany_ntop(const union inany_addr *src, char *dst, socklen_t size); +/** sockaddr_from_inany - Construct a sockaddr from an inany + * @sa: Pointer to sockaddr to fill in + * @sl: Updated to relevant of length of initialised @sa + * @addr: IPv[46] address + * @port: Port (host byte order) + * @scope: Scope ID (ignored for IPv4 addresses) + */ +static inline void sockaddr_from_inany(union sockaddr_inany *sa, socklen_t *sl, + const union inany_addr *addr, + in_port_t port, uint32_t scope) +{ + const struct in_addr *v4 = inany_v4(addr); + + if (v4) { + sa->sa_family = AF_INET; + sa->sa4.sin_addr = *v4; + sa->sa4.sin_port = htons(port); + *sl = sizeof(sa->sa4); + } else { + sa->sa_family = AF_INET6; + sa->sa6.sin6_addr = addr->a6; + sa->sa6.sin6_port = htons(port); + sa->sa6.sin6_scope_id = scope; + sa->sa6.sin6_flowinfo = 0; + *sl = sizeof(sa->sa6); + } +} + #endif /* INANY_H */ diff --git a/tcp.c b/tcp.c index bcc36fb..5fb3ce9 100644 --- a/tcp.c +++ b/tcp.c @@ -1933,18 +1933,10 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, { in_port_t srcport = ntohs(th->source); in_port_t dstport = ntohs(th->dest); - struct sockaddr_in addr4 = { - .sin_family = AF_INET, - .sin_port = htons(dstport), - .sin_addr = *(struct in_addr *)daddr, - }; - struct sockaddr_in6 addr6 = { - .sin6_family = AF_INET6, - .sin6_port = htons(dstport), - .sin6_addr = *(struct in6_addr *)daddr, - }; - const struct sockaddr *sa; + const struct flowside *ini, *fwd; struct tcp_tap_conn *conn; + union inany_addr dstaddr; /* FIXME: Avoid bulky temporary */ + union sockaddr_inany sa; union flow *flow; int s = -1, mss; socklen_t sl; @@ -1952,7 +1944,8 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, if (!(flow = flow_alloc())) return; - flow_initiate_af(flow, PIF_TAP, af, saddr, srcport, daddr, dstport); + ini = flow_initiate_af(flow, PIF_TAP, + af, saddr, srcport, daddr, dstport); if (af == AF_INET) { if (IN4_IS_ADDR_UNSPECIFIED(saddr) || @@ -1984,19 +1977,28 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, dstport); goto cancel; } + } else { + ASSERT(0); } if ((s = tcp_conn_sock(c, af)) < 0) goto cancel; + dstaddr = ini->faddr; + if (!c->no_map_gw) { - if (af == AF_INET && IN4_ARE_ADDR_EQUAL(daddr, &c->ip4.gw)) - addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); - if (af == AF_INET6 && IN6_ARE_ADDR_EQUAL(daddr, &c->ip6.gw)) - addr6.sin6_addr = in6addr_loopback; + struct in_addr *v4 = inany_v4(&dstaddr); + + if (v4 && IN4_ARE_ADDR_EQUAL(v4, &c->ip4.gw)) + *v4 = in4addr_loopback; + if (IN6_ARE_ADDR_EQUAL(&dstaddr, &c->ip6.gw)) + dstaddr.a6 = in6addr_loopback; } - if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr6.sin6_addr)) { + fwd = flow_forward_af(flow, PIF_HOST, AF_INET6, + &inany_any6, srcport, &dstaddr, dstport); + + if (IN6_IS_ADDR_LINKLOCAL(&fwd->eaddr)) { struct sockaddr_in6 addr6_ll = { .sin6_family = AF_INET6, .sin6_addr = c->ip6.addr_ll, @@ -2004,9 +2006,10 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, }; if (bind(s, (struct sockaddr *)&addr6_ll, sizeof(addr6_ll))) goto cancel; + } else if (!inany_is_loopback(&fwd->eaddr)) { + tcp_bind_outbound(c, s, af); } - flow_forward(flow, PIF_HOST); conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp); conn->sock = s; conn->timer = -1; @@ -2029,14 +2032,6 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, inany_from_af(&conn->faddr, af, daddr); - if (af == AF_INET) { - sa = (struct sockaddr *)&addr4; - sl = sizeof(addr4); - } else { - sa = (struct sockaddr *)&addr6; - sl = sizeof(addr6); - } - conn->fport = dstport; conn->eport = srcport; @@ -2049,19 +2044,16 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, tcp_hash_insert(c, conn); - if (!bind(s, sa, sl)) { + sockaddr_from_inany(&sa, &sl, &fwd->eaddr, fwd->eport, c->ifi6); + + if (!bind(s, &sa.sa, sl)) { tcp_rst(c, conn); /* Nobody is listening then */ return; } if (errno != EADDRNOTAVAIL && errno != EACCES) conn_flag(c, conn, LOCAL); - if ((af == AF_INET && !IN4_IS_ADDR_LOOPBACK(&addr4.sin_addr)) || - (af == AF_INET6 && !IN6_IS_ADDR_LOOPBACK(&addr6.sin6_addr) && - !IN6_IS_ADDR_LINKLOCAL(&addr6.sin6_addr))) - tcp_bind_outbound(c, s, af); - - if (connect(s, sa, sl)) { + if (connect(s, &sa.sa, sl)) { if (errno != EINPROGRESS) { tcp_rst(c, conn); return; @@ -2726,9 +2718,25 @@ static void tcp_tap_conn_from_sock(struct ctx *c, in_port_t dstport, const union sockaddr_inany *sa, const struct timespec *now) { + union inany_addr saddr, daddr; /* FIXME: avoid bulky temporaries */ struct tcp_tap_conn *conn; + in_port_t srcport; + + inany_from_sockaddr(&saddr, &srcport, sa); + tcp_snat_inbound(c, &saddr); - flow_forward(flow, PIF_TAP); + if (inany_v4(&saddr)) { + inany_from_af(&daddr, AF_INET, &c->ip4.addr_seen); + } else { + if (IN6_IS_ADDR_LINKLOCAL(&saddr)) + daddr.a6 = c->ip6.addr_ll_seen; + else + daddr.a6 = c->ip6.addr_seen; + } + dstport += c->tcp.fwd_in.delta[dstport]; + + flow_forward_af(flow, PIF_TAP, AF_INET6, + &saddr, srcport, &daddr, dstport); conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp); conn->sock = s; @@ -2736,10 +2744,9 @@ static void tcp_tap_conn_from_sock(struct ctx *c, in_port_t dstport, conn->ws_to_tap = conn->ws_from_tap = 0; conn_event(c, conn, SOCK_ACCEPTED); - inany_from_sockaddr(&conn->faddr, &conn->fport, sa); - conn->eport = dstport + c->tcp.fwd_in.delta[dstport]; - - tcp_snat_inbound(c, &conn->faddr); + conn->faddr = saddr; + conn->fport = srcport; + conn->eport = dstport; tcp_seq_init(c, conn, now); tcp_hash_insert(c, conn); diff --git a/tcp_splice.c b/tcp_splice.c index 0e02732..3a20b40 100644 --- a/tcp_splice.c +++ b/tcp_splice.c @@ -321,31 +321,20 @@ static int tcp_splice_connect_finish(const struct ctx *c, * tcp_splice_connect() - Create and connect socket for new spliced connection * @c: Execution context * @conn: Connection pointer - * @af: Address family - * @pif: pif on which to create socket - * @port: Destination port, host order * * Return: 0 for connect() succeeded or in progress, negative value on error */ -static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn, - sa_family_t af, uint8_t pif, in_port_t port) +static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn) { - struct sockaddr_in6 addr6 = { - .sin6_family = AF_INET6, - .sin6_port = htons(port), - .sin6_addr = IN6ADDR_LOOPBACK_INIT, - }; - struct sockaddr_in addr4 = { - .sin_family = AF_INET, - .sin_port = htons(port), - .sin_addr = IN4ADDR_LOOPBACK_INIT, - }; - const struct sockaddr *sa; + const struct flowside *fwd = &conn->f.side[FWDSIDE]; + sa_family_t af = inany_v4(&fwd->eaddr) ? AF_INET : AF_INET6; + uint8_t pif1 = conn->f.pif[FWDSIDE]; + union sockaddr_inany sa; socklen_t sl; - if (pif == PIF_HOST) + if (pif1 == PIF_HOST) conn->s[1] = tcp_conn_sock(c, af); - else if (pif == PIF_SPLICE) + else if (pif1 == PIF_SPLICE) conn->s[1] = tcp_conn_sock_ns(c, af); else ASSERT(0); @@ -359,15 +348,9 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn, conn->s[1]); } - if (CONN_V6(conn)) { - sa = (struct sockaddr *)&addr6; - sl = sizeof(addr6); - } else { - sa = (struct sockaddr *)&addr4; - sl = sizeof(addr4); - } + sockaddr_from_inany(&sa, &sl, &fwd->eaddr, fwd->eport, 0); - if (connect(conn->s[1], sa, sl)) { + if (connect(conn->s[1], &sa.sa, sl)) { if (errno != EINPROGRESS) { flow_trace(conn, "Couldn't connect socket for splice: %s", strerror(errno)); @@ -472,7 +455,12 @@ bool tcp_splice_conn_from_sock(const struct ctx *c, return false; } - flow_forward(flow, pif1); + if (af == AF_INET) + flow_forward_af(flow, pif1, AF_INET, + NULL, 0, &in4addr_loopback, dstport); + else + flow_forward_af(flow, pif1, AF_INET6, + NULL, 0, &in6addr_loopback, dstport); conn = FLOW_SET_TYPE(flow, FLOW_TCP_SPLICE, tcp_splice); conn->flags = af == AF_INET ? 0 : SPLICE_V6; @@ -484,7 +472,7 @@ bool tcp_splice_conn_from_sock(const struct ctx *c, if (setsockopt(s0, SOL_TCP, TCP_QUICKACK, &((int){ 1 }), sizeof(int))) flow_trace(conn, "failed to set TCP_QUICKACK on %i", s0); - if (tcp_splice_connect(c, conn, af, pif1, dstport)) + if (tcp_splice_connect(c, conn)) conn_flag(c, conn, CLOSING); FLOW_ACTIVATE(conn); -- 2.45.0