From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mail.ozlabs.org (mail.ozlabs.org [IPv6:2404:9400:2221:ea00::3]) by passt.top (Postfix) with ESMTPS id EF4AE5A0050 for ; Fri, 14 Jun 2024 08:13:56 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gibson.dropbear.id.au; s=202312; t=1718345631; bh=AEjrgVbUTZsNxTWrPtHWytrtkYcSYR3IWq3K4swJV8g=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=hf8qJAaVdA6T9V17yJjXSHjdFHvX/g2JgSBcFfdo2o/b4XW9hqcXOwMAAnMqAgY+S NBOiJxMo/vYZYufs0qD3Yghci10wjqhw8+dtaMtgmQ58CGk9e9Khr/pF4vi8u+hxNg 58mAdNrcYZ9X4DVjlxAV13zyV/0jcEmeMrdlUINM5FdjJl04a1Ta8fF9pQqqx5m1rC Zw3SKV++qxcnsBezzLvYLY50MXfn8v0SpK6CLdc4hZQ+4ANpB3LscC5ABz20hexWAv Eg7cA6l+445FE2EAI8otClWF2SSzvlUoE3arTLnnMdFzr15dKYGMx1Z/VcuUblY/QQ jZ6ArE96OPxdg== Received: by gandalf.ozlabs.org (Postfix, from userid 1007) id 4W0prH71xnz4wb2; Fri, 14 Jun 2024 16:13:51 +1000 (AEST) From: David Gibson To: Stefano Brivio , passt-dev@passt.top Subject: [PATCH v6 02/26] flow: Common address information for target side Date: Fri, 14 Jun 2024 16:13:24 +1000 Message-ID: <20240614061348.3814736-3-david@gibson.dropbear.id.au> X-Mailer: git-send-email 2.45.2 In-Reply-To: <20240614061348.3814736-1-david@gibson.dropbear.id.au> References: <20240614061348.3814736-1-david@gibson.dropbear.id.au> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Message-ID-Hash: EVAVHHWIURYSRIDQVHQQ2E5NC2YLXSYE X-Message-ID-Hash: EVAVHHWIURYSRIDQVHQQ2E5NC2YLXSYE X-MailFrom: dgibson@gandalf.ozlabs.org X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header CC: jmaloy@redhat.com, David Gibson X-Mailman-Version: 3.3.8 Precedence: list List-Id: Development discussion and patches for passt Archived-At: Archived-At: List-Archive: List-Archive: List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: Require the address and port information for the target (non initiating) side to be populated when a flow enters TGT state. Implement that for TCP and ICMP. For now this leaves some information redundantly recorded in both generic and type specific fields. We'll fix that in later patches. For TCP we now use the information from the flow to construct the destination socket address in both tcp_conn_from_tap() and tcp_splice_connect(). Signed-off-by: David Gibson --- flow.c | 38 ++++++++++++++++++------ flow_table.h | 5 +++- icmp.c | 3 +- inany.h | 30 ++++++++++++++++++- tcp.c | 83 ++++++++++++++++++++++++++++------------------------ tcp_splice.c | 45 +++++++++++----------------- 6 files changed, 126 insertions(+), 78 deletions(-) diff --git a/flow.c b/flow.c index 1819111d..39e046bf 100644 --- a/flow.c +++ b/flow.c @@ -165,8 +165,10 @@ void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...) */ static void flow_set_state(struct flow_common *f, enum flow_state state) { - char estr[INANY_ADDRSTRLEN], fstr[INANY_ADDRSTRLEN]; + char estr0[INANY_ADDRSTRLEN], fstr0[INANY_ADDRSTRLEN]; + char estr1[INANY_ADDRSTRLEN], fstr1[INANY_ADDRSTRLEN]; const struct flowside *ini = &f->side[INISIDE]; + const struct flowside *tgt = &f->side[TGTSIDE]; uint8_t oldstate = f->state; ASSERT(state < FLOW_NUM_STATES); @@ -177,19 +179,24 @@ static void flow_set_state(struct flow_common *f, enum flow_state state) FLOW_STATE(f)); if (MAX(state, oldstate) >= FLOW_STATE_TGT) - flow_log_(f, LOG_DEBUG, "%s [%s]:%hu -> [%s]:%hu => %s", + flow_log_(f, LOG_DEBUG, + "%s [%s]:%hu -> [%s]:%hu => %s [%s]:%hu -> [%s]:%hu", pif_name(f->pif[INISIDE]), - inany_ntop(&ini->eaddr, estr, sizeof(estr)), + inany_ntop(&ini->eaddr, estr0, sizeof(estr0)), ini->eport, - inany_ntop(&ini->faddr, fstr, sizeof(fstr)), + inany_ntop(&ini->faddr, fstr0, sizeof(fstr0)), ini->fport, - pif_name(f->pif[TGTSIDE])); + pif_name(f->pif[TGTSIDE]), + inany_ntop(&tgt->faddr, fstr1, sizeof(fstr1)), + tgt->fport, + inany_ntop(&tgt->eaddr, estr1, sizeof(estr1)), + tgt->eport); else if (MAX(state, oldstate) >= FLOW_STATE_INI) flow_log_(f, LOG_DEBUG, "%s [%s]:%hu -> [%s]:%hu => ?", pif_name(f->pif[INISIDE]), - inany_ntop(&ini->eaddr, estr, sizeof(estr)), + inany_ntop(&ini->eaddr, estr0, sizeof(estr0)), ini->eport, - inany_ntop(&ini->faddr, fstr, sizeof(fstr)), + inany_ntop(&ini->faddr, fstr0, sizeof(fstr0)), ini->fport); } @@ -261,21 +268,34 @@ const struct flowside *flow_initiate_sa(union flow *flow, uint8_t pif, } /** - * flow_target() - Move flow to TGT, setting TGTSIDE details + * flow_target_af() - Move flow to TGT, setting TGTSIDE details * @flow: Flow to change state * @pif: pif of the target side + * @af: Address family for @eaddr and @faddr + * @saddr: Source address (pointer to in_addr or in6_addr) + * @sport: Endpoint port + * @daddr: Destination address (pointer to in_addr or in6_addr) + * @dport: Destination port + * + * Return: pointer to the target flowside information */ -void flow_target(union flow *flow, uint8_t pif) +const struct flowside *flow_target_af(union flow *flow, uint8_t pif, + sa_family_t af, + const void *saddr, in_port_t sport, + const void *daddr, in_port_t dport) { struct flow_common *f = &flow->f; + struct flowside *tgt = &f->side[TGTSIDE]; ASSERT(pif != PIF_NONE); ASSERT(flow_new_entry == flow && f->state == FLOW_STATE_INI); ASSERT(f->type == FLOW_TYPE_NONE); ASSERT(f->pif[INISIDE] != PIF_NONE && f->pif[TGTSIDE] == PIF_NONE); + flowside_from_af(tgt, af, daddr, dport, saddr, sport); f->pif[TGTSIDE] = pif; flow_set_state(f, FLOW_STATE_TGT); + return tgt; } /** diff --git a/flow_table.h b/flow_table.h index 2e912532..7af32c6a 100644 --- a/flow_table.h +++ b/flow_table.h @@ -114,7 +114,10 @@ const struct flowside *flow_initiate_af(union flow *flow, uint8_t pif, const struct flowside *flow_initiate_sa(union flow *flow, uint8_t pif, const union sockaddr_inany *ssa, in_port_t dport); -void flow_target(union flow *flow, uint8_t pif); +const struct flowside *flow_target_af(union flow *flow, uint8_t pif, + sa_family_t af, + const void *saddr, in_port_t sport, + const void *daddr, in_port_t dport); union flow *flow_set_type(union flow *flow, enum flow_type type); #define FLOW_SET_TYPE(flow_, t_, var_) (&flow_set_type((flow_), (t_))->var_) diff --git a/icmp.c b/icmp.c index 38d1deeb..c86c54c3 100644 --- a/icmp.c +++ b/icmp.c @@ -167,7 +167,8 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c, return NULL; flow_initiate_af(flow, PIF_TAP, af, saddr, id, daddr, id); - flow_target(flow, PIF_HOST); + /* FIXME: Record outbound source address when known */ + flow_target_af(flow, PIF_HOST, af, NULL, 0, daddr, 0); pingf = FLOW_SET_TYPE(flow, flowtype, ping); pingf->seq = -1; diff --git a/inany.h b/inany.h index 47b66fa9..2bf3becf 100644 --- a/inany.h +++ b/inany.h @@ -187,7 +187,6 @@ static inline bool inany_is_unspecified(const union inany_addr *a) * * Return: true if @a is in fe80::/10 (IPv6 link local unicast) */ -/* cppcheck-suppress unusedFunction */ static inline bool inany_is_linklocal6(const union inany_addr *a) { return IN6_IS_ADDR_LINKLOCAL(&a->a6); @@ -273,4 +272,33 @@ static inline void inany_siphash_feed(struct siphash_state *state, const char *inany_ntop(const union inany_addr *src, char *dst, socklen_t size); +/** sockaddr_from_inany() - Construct a sockaddr from an inany + * @sa: Pointer to sockaddr to fill in + * @sl: Updated to relevant of length of initialised @sa + * @addr: IPv[46] address + * @port: Port (host byte order) + * @scope: Scope ID (ignored for IPv4 addresses) + */ +static inline void sockaddr_from_inany(union sockaddr_inany *sa, socklen_t *sl, + const union inany_addr *addr, + in_port_t port, uint32_t scope) +{ + const struct in_addr *v4 = inany_v4(addr); + + if (v4) { + sa->sa_family = AF_INET; + sa->sa4.sin_addr = *v4; + sa->sa4.sin_port = htons(port); + memset(&sa->sa4.sin_zero, 0, sizeof(sa->sa4.sin_zero)); + *sl = sizeof(sa->sa4); + } else { + sa->sa_family = AF_INET6; + sa->sa6.sin6_addr = addr->a6; + sa->sa6.sin6_port = htons(port); + sa->sa6.sin6_scope_id = scope; + sa->sa6.sin6_flowinfo = 0; + *sl = sizeof(sa->sa6); + } +} + #endif /* INANY_H */ diff --git a/tcp.c b/tcp.c index 07a2eb1c..c6cd0c72 100644 --- a/tcp.c +++ b/tcp.c @@ -1610,18 +1610,10 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, { in_port_t srcport = ntohs(th->source); in_port_t dstport = ntohs(th->dest); - struct sockaddr_in addr4 = { - .sin_family = AF_INET, - .sin_port = htons(dstport), - .sin_addr = *(struct in_addr *)daddr, - }; - struct sockaddr_in6 addr6 = { - .sin6_family = AF_INET6, - .sin6_port = htons(dstport), - .sin6_addr = *(struct in6_addr *)daddr, - }; - const struct sockaddr *sa; + const struct flowside *ini, *tgt; struct tcp_tap_conn *conn; + union inany_addr dstaddr; /* FIXME: Avoid bulky temporary */ + union sockaddr_inany sa; union flow *flow; int s = -1, mss; socklen_t sl; @@ -1629,7 +1621,8 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, if (!(flow = flow_alloc())) return; - flow_initiate_af(flow, PIF_TAP, af, saddr, srcport, daddr, dstport); + ini = flow_initiate_af(flow, PIF_TAP, + af, saddr, srcport, daddr, dstport); if (af == AF_INET) { if (IN4_IS_ADDR_UNSPECIFIED(saddr) || @@ -1661,19 +1654,28 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, dstport); goto cancel; } + } else { + ASSERT(0); } if ((s = tcp_conn_sock(c, af)) < 0) goto cancel; + dstaddr = ini->faddr; + if (!c->no_map_gw) { - if (af == AF_INET && IN4_ARE_ADDR_EQUAL(daddr, &c->ip4.gw)) - addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); - if (af == AF_INET6 && IN6_ARE_ADDR_EQUAL(daddr, &c->ip6.gw)) - addr6.sin6_addr = in6addr_loopback; + if (inany_equals4(&dstaddr, &c->ip4.gw)) + dstaddr = inany_loopback4; + else if (inany_equals6(&dstaddr, &c->ip6.gw)) + dstaddr = inany_loopback6; } - if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr6.sin6_addr)) { + /* FIXME: Record outbound source address when known */ + tgt = flow_target_af(flow, PIF_HOST, AF_INET6, + NULL, 0, /* Kernel decides source address */ + &dstaddr, dstport); + + if (inany_is_linklocal6(&tgt->eaddr)) { struct sockaddr_in6 addr6_ll = { .sin6_family = AF_INET6, .sin6_addr = c->ip6.addr_ll, @@ -1681,9 +1683,10 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, }; if (bind(s, (struct sockaddr *)&addr6_ll, sizeof(addr6_ll))) goto cancel; + } else if (!inany_is_loopback(&tgt->eaddr)) { + tcp_bind_outbound(c, s, af); } - flow_target(flow, PIF_HOST); conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp); conn->sock = s; conn->timer = -1; @@ -1706,14 +1709,6 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, inany_from_af(&conn->faddr, af, daddr); - if (af == AF_INET) { - sa = (struct sockaddr *)&addr4; - sl = sizeof(addr4); - } else { - sa = (struct sockaddr *)&addr6; - sl = sizeof(addr6); - } - conn->fport = dstport; conn->eport = srcport; @@ -1726,19 +1721,16 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, tcp_hash_insert(c, conn); - if (!bind(s, sa, sl)) { + sockaddr_from_inany(&sa, &sl, &tgt->eaddr, tgt->eport, c->ifi6); + + if (!bind(s, &sa.sa, sl)) { tcp_rst(c, conn); /* Nobody is listening then */ goto cancel; } if (errno != EADDRNOTAVAIL && errno != EACCES) conn_flag(c, conn, LOCAL); - if ((af == AF_INET && !IN4_IS_ADDR_LOOPBACK(&addr4.sin_addr)) || - (af == AF_INET6 && !IN6_IS_ADDR_LOOPBACK(&addr6.sin6_addr) && - !IN6_IS_ADDR_LINKLOCAL(&addr6.sin6_addr))) - tcp_bind_outbound(c, s, af); - - if (connect(s, sa, sl)) { + if (connect(s, &sa.sa, sl)) { if (errno != EINPROGRESS) { tcp_rst(c, conn); goto cancel; @@ -2236,9 +2228,25 @@ static void tcp_tap_conn_from_sock(struct ctx *c, in_port_t dstport, const union sockaddr_inany *sa, const struct timespec *now) { + union inany_addr saddr, daddr; /* FIXME: avoid bulky temporaries */ struct tcp_tap_conn *conn; + in_port_t srcport; + + inany_from_sockaddr(&saddr, &srcport, sa); + tcp_snat_inbound(c, &saddr); - flow_target(flow, PIF_TAP); + if (inany_v4(&saddr)) { + daddr = inany_from_v4(c->ip4.addr_seen); + } else { + if (inany_is_linklocal6(&saddr)) + daddr.a6 = c->ip6.addr_ll_seen; + else + daddr.a6 = c->ip6.addr_seen; + } + dstport += c->tcp.fwd_in.delta[dstport]; + + flow_target_af(flow, PIF_TAP, AF_INET6, + &saddr, srcport, &daddr, dstport); conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp); conn->sock = s; @@ -2246,10 +2254,9 @@ static void tcp_tap_conn_from_sock(struct ctx *c, in_port_t dstport, conn->ws_to_tap = conn->ws_from_tap = 0; conn_event(c, conn, SOCK_ACCEPTED); - inany_from_sockaddr(&conn->faddr, &conn->fport, sa); - conn->eport = dstport + c->tcp.fwd_in.delta[dstport]; - - tcp_snat_inbound(c, &conn->faddr); + conn->faddr = saddr; + conn->fport = srcport; + conn->eport = dstport; tcp_seq_init(c, conn, now); tcp_hash_insert(c, conn); diff --git a/tcp_splice.c b/tcp_splice.c index 5a406c63..bcb42c97 100644 --- a/tcp_splice.c +++ b/tcp_splice.c @@ -320,31 +320,20 @@ static int tcp_splice_connect_finish(const struct ctx *c, * tcp_splice_connect() - Create and connect socket for new spliced connection * @c: Execution context * @conn: Connection pointer - * @af: Address family - * @pif: pif on which to create socket - * @port: Destination port, host order * * Return: 0 for connect() succeeded or in progress, negative value on error */ -static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn, - sa_family_t af, uint8_t pif, in_port_t port) +static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn) { - struct sockaddr_in6 addr6 = { - .sin6_family = AF_INET6, - .sin6_port = htons(port), - .sin6_addr = IN6ADDR_LOOPBACK_INIT, - }; - struct sockaddr_in addr4 = { - .sin_family = AF_INET, - .sin_port = htons(port), - .sin_addr = IN4ADDR_LOOPBACK_INIT, - }; - const struct sockaddr *sa; + const struct flowside *tgt = &conn->f.side[TGTSIDE]; + sa_family_t af = inany_v4(&tgt->eaddr) ? AF_INET : AF_INET6; + uint8_t tgtpif = conn->f.pif[TGTSIDE]; + union sockaddr_inany sa; socklen_t sl; - if (pif == PIF_HOST) + if (tgtpif == PIF_HOST) conn->s[1] = tcp_conn_sock(c, af); - else if (pif == PIF_SPLICE) + else if (tgtpif == PIF_SPLICE) conn->s[1] = tcp_conn_sock_ns(c, af); else ASSERT(0); @@ -358,15 +347,9 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn, conn->s[1]); } - if (CONN_V6(conn)) { - sa = (struct sockaddr *)&addr6; - sl = sizeof(addr6); - } else { - sa = (struct sockaddr *)&addr4; - sl = sizeof(addr4); - } + sockaddr_from_inany(&sa, &sl, &tgt->eaddr, tgt->eport, 0); - if (connect(conn->s[1], sa, sl)) { + if (connect(conn->s[1], &sa.sa, sl)) { if (errno != EINPROGRESS) { flow_trace(conn, "Couldn't connect socket for splice: %s", strerror(errno)); @@ -471,7 +454,13 @@ bool tcp_splice_conn_from_sock(const struct ctx *c, return false; } - flow_target(flow, tgtpif); + /* FIXME: Record outbound source address when known */ + if (af == AF_INET) + flow_target_af(flow, tgtpif, AF_INET, + NULL, 0, &in4addr_loopback, dstport); + else + flow_target_af(flow, tgtpif, AF_INET6, + NULL, 0, &in6addr_loopback, dstport); conn = FLOW_SET_TYPE(flow, FLOW_TCP_SPLICE, tcp_splice); conn->flags = af == AF_INET ? 0 : SPLICE_V6; @@ -483,7 +472,7 @@ bool tcp_splice_conn_from_sock(const struct ctx *c, if (setsockopt(s0, SOL_TCP, TCP_QUICKACK, &((int){ 1 }), sizeof(int))) flow_trace(conn, "failed to set TCP_QUICKACK on %i", s0); - if (tcp_splice_connect(c, conn, af, tgtpif, dstport)) + if (tcp_splice_connect(c, conn)) conn_flag(c, conn, CLOSING); FLOW_ACTIVATE(conn); -- 2.45.2