From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>, passt-dev@passt.top
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH 6/8] tcp: Track guest-side correspondent address
Date: Fri, 28 Jul 2023 19:48:29 +1000 [thread overview]
Message-ID: <20230728094831.4097571-7-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20230728094831.4097571-1-david@gibson.dropbear.id.au>
Currently the only address we explicitly track in the TCP connection
structure is the tap side forwarding address - that is the remote address
from the guest's point of view. The tap side correspondent address - the
local address from the guest's point of view - is assumed to always be one
of the handful of guest addresses we track as addr_seen (one each for IPv4,
IPv6 global and IPv6 link-local).
We want to generalize our forwarding model to allow the guest to have
multiple addresses. As a start on this, track the tap-side correspondent
address in the connection structure, only using one of the addr_seen
variables when we start a new connection.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
tcp.c | 37 +++++++++++++++++--------------------
tcp_conn.h | 2 ++
2 files changed, 19 insertions(+), 20 deletions(-)
diff --git a/tcp.c b/tcp.c
index ac7ae60..6c4d71e 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1449,7 +1449,7 @@ do { \
ip_len = plen + sizeof(struct iphdr) + sizeof(struct tcphdr);
b->iph.tot_len = htons(ip_len);
b->iph.saddr = a4->s_addr;
- b->iph.daddr = c->ip4.addr_seen.s_addr;
+ b->iph.daddr = inany_v4(&conn->caddr)->s_addr;
if (check)
b->iph.check = *check;
@@ -1468,10 +1468,7 @@ do { \
b->ip6h.payload_len = htons(plen + sizeof(struct tcphdr));
b->ip6h.saddr = conn->faddr.a6;
- if (IN6_IS_ADDR_LINKLOCAL(&b->ip6h.saddr))
- b->ip6h.daddr = c->ip6.addr_ll_seen;
- else
- b->ip6h.daddr = c->ip6.addr_seen;
+ b->ip6h.daddr = conn->caddr.a6;
memset(b->ip6h.flow_lbl, 0, 3);
@@ -1820,13 +1817,12 @@ static void tcp_clamp_window(const struct ctx *c, struct tcp_tap_conn *conn,
/**
* tcp_seq_init() - Calculate initial sequence number according to RFC 6528
* @c: Execution context
- * @conn: TCP connection, with faddr, fport and cport populated
+ * @conn: TCP connection, with faddr, fport, caddr, cport populated
* @now: Current timestamp
*/
static void tcp_seq_init(const struct ctx *c, struct tcp_tap_conn *conn,
const struct timespec *now)
{
- union inany_addr aany;
struct {
union inany_addr src;
in_port_t srcport;
@@ -1835,16 +1831,11 @@ static void tcp_seq_init(const struct ctx *c, struct tcp_tap_conn *conn,
} __attribute__((__packed__)) in = {
.src = conn->faddr,
.srcport = conn->cport,
+ .dst = conn->caddr,
.dstport = conn->fport,
};
uint32_t ns, seq = 0;
- if (CONN_V4(conn))
- inany_from_af(&aany, AF_INET, &c->ip4.addr);
- else
- inany_from_af(&aany, AF_INET6, &c->ip6.addr);
- in.dst = aany;
-
seq = siphash_36b((uint8_t *)&in, c->tcp.hash_secret);
/* 32ns ticks, overflows 32 bits every 137s */
@@ -2011,8 +2002,6 @@ static void tcp_conn_from_tap(struct ctx *c,
socklen_t sl;
int s, mss;
- (void)saddr;
-
if (c->tcp.conn_count >= TCP_MAX_CONNS)
return;
@@ -2061,6 +2050,9 @@ static void tcp_conn_from_tap(struct ctx *c,
conn->wnd_from_tap = 1;
inany_from_af(&conn->faddr, af, daddr);
+ inany_from_af(&conn->caddr, af, saddr);
+ conn->fport = ntohs(th->dest);
+ conn->cport = ntohs(th->source);
if (af == AF_INET) {
sa = (struct sockaddr *)&addr4;
@@ -2070,9 +2062,6 @@ static void tcp_conn_from_tap(struct ctx *c,
sl = sizeof(addr6);
}
- conn->fport = ntohs(th->dest);
- conn->cport = ntohs(th->source);
-
conn->seq_init_from_tap = ntohl(th->seq);
conn->seq_from_tap = conn->seq_init_from_tap + 1;
conn->seq_ack_to_tap = conn->seq_from_tap;
@@ -2731,10 +2720,18 @@ static void tcp_tap_conn_from_sock(struct ctx *c, union epoll_ref ref,
conn_event(c, conn, SOCK_ACCEPTED);
inany_from_sockaddr(&conn->faddr, &conn->fport, sa);
- conn->cport = ref.r.p.tcp.tcp.index;
-
tcp_snat_inbound(c, &conn->faddr);
+ if (CONN_V4(conn)) {
+ inany_from_af(&conn->caddr, AF_INET, &c->ip4.addr_seen);
+ } else {
+ if (IN6_IS_ADDR_LINKLOCAL(&conn->faddr.a6))
+ conn->caddr.a6 = c->ip6.addr_ll_seen;
+ else
+ conn->caddr.a6 = c->ip6.addr_seen;
+ }
+ conn->cport = ref.r.p.tcp.tcp.index;
+
tcp_seq_init(c, conn, now);
tcp_hash_insert(c, conn);
diff --git a/tcp_conn.h b/tcp_conn.h
index ba2a1ef..9151c18 100644
--- a/tcp_conn.h
+++ b/tcp_conn.h
@@ -35,6 +35,7 @@ extern const char *tcp_common_flag_str[];
* @ws_to_tap: Window scaling factor advertised to tap/guest
* @sndbuf: Sending buffer in kernel, rounded to 2 ^ SNDBUF_BITS
* @seq_dup_ack_approx: Last duplicate ACK number sent to tap
+ * @caddr: Guest side correspondent address (guest's local address)
* @faddr: Guest side forwarding address (guest's remote address)
* @cport: Guest side correspondent port (guest's local port)
* @fport: Guest side forwarding port (guest's remote port)
@@ -105,6 +106,7 @@ struct tcp_tap_conn {
uint8_t seq_dup_ack_approx;
+ union inany_addr caddr;
union inany_addr faddr;
in_port_t cport;
in_port_t fport;
--
@@ -35,6 +35,7 @@ extern const char *tcp_common_flag_str[];
* @ws_to_tap: Window scaling factor advertised to tap/guest
* @sndbuf: Sending buffer in kernel, rounded to 2 ^ SNDBUF_BITS
* @seq_dup_ack_approx: Last duplicate ACK number sent to tap
+ * @caddr: Guest side correspondent address (guest's local address)
* @faddr: Guest side forwarding address (guest's remote address)
* @cport: Guest side correspondent port (guest's local port)
* @fport: Guest side forwarding port (guest's remote port)
@@ -105,6 +106,7 @@ struct tcp_tap_conn {
uint8_t seq_dup_ack_approx;
+ union inany_addr caddr;
union inany_addr faddr;
in_port_t cport;
in_port_t fport;
--
2.41.0
next prev parent reply other threads:[~2023-07-28 9:48 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-07-28 9:48 [PATCH 0/8] RFC: Generalize flow tracking, part 1 David Gibson
2023-07-28 9:48 ` [PATCH 1/8] tap: Don't clobber source address in tap6_handler() David Gibson
2023-07-28 9:48 ` [PATCH 2/8] tap: Pass source address to protocol handler functions David Gibson
2023-07-28 9:48 ` [PATCH 3/8] tcp: More precise terms for addresses and ports David Gibson
2023-07-28 9:48 ` [PATCH 4/8] tcp, udp: Don't include destination address in partially precomputed csums David Gibson
2023-07-28 9:48 ` [PATCH 5/8] tcp, udp: Don't pre-fill IPv4 destination address in headers David Gibson
2023-07-28 9:48 ` David Gibson [this message]
2023-07-28 9:48 ` [PATCH 7/8] tcp, flow: Introduce struct demiflow David Gibson
2023-07-28 9:48 ` [PATCH 8/8] tcp, flow: Perform TCP hash calculations based on demiflow structure David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230728094831.4097571-7-david@gibson.dropbear.id.au \
--to=david@gibson.dropbear.id.au \
--cc=passt-dev@passt.top \
--cc=sbrivio@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).