From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from gandalf.ozlabs.org (gandalf.ozlabs.org [150.107.74.76]) by passt.top (Postfix) with ESMTPS id 739675A027F for ; Mon, 28 Aug 2023 07:42:12 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gibson.dropbear.id.au; s=201602; t=1693201322; bh=ssR88zPi9DCWmGm6c9oSutIUMDQvPA2KSeKVpNlTSGo=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=f0WZBhlk4fsY49VteztcoWdlPxXF2VtXhIIYGWnBiCDORYVgmEgGk1nPLFesHkfRM bgpvK/dKgrHpdxmt+vFLfinod0FwCwvQu+O2O0MoG8Mbcicuf+eQHCN5+FZ95EJQzM yac612f0txpIYJjWq0rzKj4NiKSuhxuj97N5vxOc= Received: by gandalf.ozlabs.org (Postfix, from userid 1007) id 4RYzvt5y07z4wy1; Mon, 28 Aug 2023 15:42:02 +1000 (AEST) From: David Gibson To: passt-dev@passt.top, Stefano Brivio Subject: [PATCH v2 06/10] tcp: Move guest side address tracking to flow/flowside Date: Mon, 28 Aug 2023 15:41:42 +1000 Message-ID: <20230828054146.48673-7-david@gibson.dropbear.id.au> X-Mailer: git-send-email 2.41.0 In-Reply-To: <20230828054146.48673-1-david@gibson.dropbear.id.au> References: <20230828054146.48673-1-david@gibson.dropbear.id.au> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Message-ID-Hash: NE4JIRPECHNDZSWWM64OCNQFYM2XU5KE X-Message-ID-Hash: NE4JIRPECHNDZSWWM64OCNQFYM2XU5KE X-MailFrom: dgibson@gandalf.ozlabs.org X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header CC: David Gibson X-Mailman-Version: 3.3.8 Precedence: list List-Id: Development discussion and patches for passt Archived-At: Archived-At: List-Archive: List-Archive: List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: tcp_tap_conn has several fields to track addresses and ports as seen by the guest/namespace. However we now have general fields for this in the common flowside fields of struct flow. Use those instead of protocol specific fields. So far we've only explicitly tracked the guest side forwarding address in the TCP connection - the remote address from the guest's point of view. The tap side endpoint address - the local address from the guest's point of view - was assumed to always be one of the handful of guest addresses we track as addr_seen (one each for IPv4, IPv6 global and IPv6 link-local). struct flowside expects both addresses, and we will want to use the endpoint address in future. So, determine that address and store it as part of the flowside. Signed-off-by: David Gibson --- flow.c | 1 - tcp.c | 80 +++++++++++++++++++++++++++++------------------------- tcp_conn.h | 6 +--- 3 files changed, 44 insertions(+), 43 deletions(-) diff --git a/flow.c b/flow.c index a93cf8c..d7264f8 100644 --- a/flow.c +++ b/flow.c @@ -32,7 +32,6 @@ union flow flowtab[FLOW_MAX]; * * Return: pointer to formatted string describing @fs, or NULL on error */ -/* cppcheck-suppress unusedFunction */ const char *flowside_fmt(const struct flowside *fs, char *buf, size_t size) { char ebuf[INET6_ADDRSTRLEN], fbuf[INET6_ADDRSTRLEN]; diff --git a/tcp.c b/tcp.c index 722a613..16b930e 100644 --- a/tcp.c +++ b/tcp.c @@ -397,7 +397,9 @@ struct tcp6_l2_head { /* For MSS6 macro: keep in sync with tcp6_l2_buf_t */ #define OPT_SACK 5 #define OPT_TS 8 -#define CONN_V4(conn) (!!inany_v4(&(conn)->faddr)) +#define TAPSIDE(conn) (&(conn)->f.side[1]) + +#define CONN_V4(conn) (!!inany_v4(&TAPSIDE(conn)->faddr)) #define CONN_V6(conn) (!CONN_V4(conn)) #define CONN_IS_CLOSING(conn) \ ((conn->events & ESTABLISHED) && \ @@ -844,7 +846,7 @@ static int tcp_rtt_dst_low(const struct tcp_tap_conn *conn) int i; for (i = 0; i < LOW_RTT_TABLE_SIZE; i++) - if (inany_equals(&conn->faddr, low_rtt_dst + i)) + if (inany_equals(&TAPSIDE(conn)->faddr, low_rtt_dst + i)) return 1; return 0; @@ -866,7 +868,7 @@ static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn, return; for (i = 0; i < LOW_RTT_TABLE_SIZE; i++) { - if (inany_equals(&conn->faddr, low_rtt_dst + i)) + if (inany_equals(&TAPSIDE(conn)->faddr, low_rtt_dst + i)) return; if (hole == -1 && IN6_IS_ADDR_UNSPECIFIED(low_rtt_dst + i)) hole = i; @@ -878,7 +880,7 @@ static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn, if (hole == -1) return; - low_rtt_dst[hole++] = conn->faddr; + low_rtt_dst[hole++] = TAPSIDE(conn)->faddr; if (hole == LOW_RTT_TABLE_SIZE) hole = 0; inany_from_af(low_rtt_dst + hole, AF_INET6, &in6addr_any); @@ -1143,8 +1145,8 @@ static int tcp_hash_match(const struct tcp_tap_conn *conn, const union inany_addr *faddr, in_port_t eport, in_port_t fport) { - if (inany_equals(&conn->faddr, faddr) && - conn->eport == eport && conn->fport == fport) + if (inany_equals(&TAPSIDE(conn)->faddr, faddr) && + TAPSIDE(conn)->eport == eport && TAPSIDE(conn)->fport == fport) return 1; return 0; @@ -1186,7 +1188,8 @@ static unsigned int tcp_hash(const struct ctx *c, const union inany_addr *faddr, static unsigned int tcp_conn_hash(const struct ctx *c, const struct tcp_tap_conn *conn) { - return tcp_hash(c, &conn->faddr, conn->eport, conn->fport); + return tcp_hash(c, &TAPSIDE(conn)->faddr, + TAPSIDE(conn)->eport, TAPSIDE(conn)->fport); } /** @@ -1198,7 +1201,8 @@ static void tcp_hash_insert(const struct ctx *c, struct tcp_tap_conn *conn) { int b; - b = tcp_hash(c, &conn->faddr, conn->eport, conn->fport); + b = tcp_hash(c, &TAPSIDE(conn)->faddr, + TAPSIDE(conn)->eport, TAPSIDE(conn)->fport); conn->next_index = tc_hash[b] ? FLOW_IDX(tc_hash[b]) : -1U; tc_hash[b] = conn; @@ -1386,13 +1390,13 @@ static size_t tcp_l2_buf_fill_headers(const struct ctx *c, void *p, size_t plen, const uint16_t *check, uint32_t seq) { - const struct in_addr *a4 = inany_v4(&conn->faddr); + const struct in_addr *a4 = inany_v4(&TAPSIDE(conn)->faddr); size_t ip_len, tlen; #define SET_TCP_HEADER_COMMON_V4_V6(b, conn, seq) \ do { \ - b->th.source = htons(conn->fport); \ - b->th.dest = htons(conn->eport); \ + b->th.source = htons(TAPSIDE(conn)->fport); \ + b->th.dest = htons(TAPSIDE(conn)->eport); \ b->th.seq = htonl(seq); \ b->th.ack_seq = htonl(conn->seq_ack_to_tap); \ if (conn->events & ESTABLISHED) { \ @@ -1410,7 +1414,7 @@ do { \ ip_len = plen + sizeof(struct iphdr) + sizeof(struct tcphdr); b->iph.tot_len = htons(ip_len); b->iph.saddr = a4->s_addr; - b->iph.daddr = c->ip4.addr_seen.s_addr; + b->iph.daddr = inany_v4(&TAPSIDE(conn)->eaddr)->s_addr; if (check) b->iph.check = *check; @@ -1428,11 +1432,8 @@ do { \ ip_len = plen + sizeof(struct ipv6hdr) + sizeof(struct tcphdr); b->ip6h.payload_len = htons(plen + sizeof(struct tcphdr)); - b->ip6h.saddr = conn->faddr.a6; - if (IN6_IS_ADDR_LINKLOCAL(&b->ip6h.saddr)) - b->ip6h.daddr = c->ip6.addr_ll_seen; - else - b->ip6h.daddr = c->ip6.addr_seen; + b->ip6h.saddr = TAPSIDE(conn)->faddr.a6; + b->ip6h.daddr = TAPSIDE(conn)->eaddr.a6; memset(b->ip6h.flow_lbl, 0, 3); @@ -1781,31 +1782,25 @@ static void tcp_clamp_window(const struct ctx *c, struct tcp_tap_conn *conn, /** * tcp_seq_init() - Calculate initial sequence number according to RFC 6528 * @c: Execution context - * @conn: TCP connection, with faddr, fport and eport populated + * @conn: TCP connection, with faddr, fport, eaddr, eport populated * @now: Current timestamp */ static void tcp_seq_init(const struct ctx *c, struct tcp_tap_conn *conn, const struct timespec *now) { - union inany_addr aany; struct { union inany_addr src; in_port_t srcport; union inany_addr dst; in_port_t dstport; } __attribute__((__packed__)) in = { - .src = conn->faddr, - .srcport = conn->fport, - .dstport = conn->eport, + .src = TAPSIDE(conn)->faddr, + .srcport = TAPSIDE(conn)->fport, + .dst = TAPSIDE(conn)->eaddr, + .dstport = TAPSIDE(conn)->eport, }; uint32_t ns, seq = 0; - if (CONN_V4(conn)) - inany_from_af(&aany, AF_INET, &c->ip4.addr); - else - inany_from_af(&aany, AF_INET6, &c->ip6.addr); - in.dst = aany; - seq = siphash_36b((uint8_t *)&in, c->tcp.hash_secret); /* 32ns ticks, overflows 32 bits every 137s */ @@ -1967,13 +1962,12 @@ static void tcp_conn_from_tap(struct ctx *c, .sin6_port = th->dest, .sin6_addr = *(struct in6_addr *)daddr, }; + char fsstr[FLOWSIDE_STRLEN]; const struct sockaddr *sa; struct tcp_tap_conn *conn; socklen_t sl; int s, mss; - (void)saddr; - if (c->flow_count >= FLOW_MAX) return; @@ -2021,7 +2015,12 @@ static void tcp_conn_from_tap(struct ctx *c, if (!(conn->wnd_from_tap = (htons(th->window) >> conn->ws_from_tap))) conn->wnd_from_tap = 1; - inany_from_af(&conn->faddr, af, daddr); + flowside_from_af(TAPSIDE(conn), af, daddr, ntohs(th->dest), + saddr, ntohs(th->source)); + ASSERT(flowside_complete(TAPSIDE(conn))); + + debug("TCP: index %li, new connection from tap, %s", FLOW_IDX(conn), + flowside_fmt(TAPSIDE(conn), fsstr, sizeof(fsstr))); if (af == AF_INET) { sa = (struct sockaddr *)&addr4; @@ -2031,9 +2030,6 @@ static void tcp_conn_from_tap(struct ctx *c, sl = sizeof(addr6); } - conn->fport = ntohs(th->dest); - conn->eport = ntohs(th->source); - conn->seq_init_from_tap = ntohl(th->seq); conn->seq_from_tap = conn->seq_init_from_tap + 1; conn->seq_ack_to_tap = conn->seq_from_tap; @@ -2692,10 +2688,20 @@ static void tcp_tap_conn_from_sock(struct ctx *c, conn->ws_to_tap = conn->ws_from_tap = 0; conn_event(c, conn, SOCK_ACCEPTED); - inany_from_sockaddr(&conn->faddr, &conn->fport, sa); - conn->eport = ref.port; + inany_from_sockaddr(&TAPSIDE(conn)->faddr, &TAPSIDE(conn)->fport, sa); + tcp_snat_inbound(c, &TAPSIDE(conn)->faddr); + + if (CONN_V4(conn)) { + inany_from_af(&TAPSIDE(conn)->eaddr, AF_INET, &c->ip4.addr_seen); + } else { + if (IN6_IS_ADDR_LINKLOCAL(&TAPSIDE(conn)->faddr.a6)) + TAPSIDE(conn)->eaddr.a6 = c->ip6.addr_ll_seen; + else + TAPSIDE(conn)->eaddr.a6 = c->ip6.addr_seen; + } + TAPSIDE(conn)->eport = ref.port; - tcp_snat_inbound(c, &conn->faddr); + ASSERT(flowside_complete(TAPSIDE(conn))); tcp_seq_init(c, conn, now); tcp_hash_insert(c, conn); diff --git a/tcp_conn.h b/tcp_conn.h index 4e7c7fc..3482759 100644 --- a/tcp_conn.h +++ b/tcp_conn.h @@ -24,6 +24,7 @@ * @ws_to_tap: Window scaling factor advertised to tap/guest * @sndbuf: Sending buffer in kernel, rounded to 2 ^ SNDBUF_BITS * @seq_dup_ack_approx: Last duplicate ACK number sent to tap + * @eaddr: Guest side endpoint address (guest's local address) * @faddr: Guest side forwarding address (guest's remote address) * @eport: Guest side endpoint port (guest's local port) * @fport: Guest side forwarding port (guest's remote port) @@ -94,11 +95,6 @@ struct tcp_tap_conn { uint8_t seq_dup_ack_approx; - - union inany_addr faddr; - in_port_t eport; - in_port_t fport; - uint16_t wnd_from_tap; uint16_t wnd_to_tap; -- 2.41.0