From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>, passt-dev@passt.top
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH 7/8] tcp, flow: Introduce struct demiflow
Date: Fri, 28 Jul 2023 19:48:30 +1000 [thread overview]
Message-ID: <20230728094831.4097571-8-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20230728094831.4097571-1-david@gibson.dropbear.id.au>
For TCP tap connections we keep track of both the IP address and port for
each side of a connection as seen by the guest. We're planning to track
similar information in a number of other places as well.
To assist with this, create a new structure: struct demiflow to track both
sides of a connection or other logical packet flow as seen from a single
"side" of passt. Also add a small helper function for initializing this
structure.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
flow.h | 41 ++++++++++++++++++++++++++++++++++++
tcp.c | 59 ++++++++++++++++++++++++++--------------------------
tcp_conn.h | 11 ++--------
tcp_splice.c | 1 +
4 files changed, 74 insertions(+), 38 deletions(-)
create mode 100644 flow.h
diff --git a/flow.h b/flow.h
new file mode 100644
index 0000000..f7c0981
--- /dev/null
+++ b/flow.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright Red Hat
+ * Author: David Gibson <david@gibson.dropbear.id.au>
+ *
+ * Tracking for logical "flows" of packets.
+ */
+#ifndef FLOW_H
+#define FLOW_H
+
+/**
+ * struct demiflow - Describes a logical packet flow as seen from one "side"
+ * @caddr: Correspondent address (remote address from passt's PoV)
+ * @faddr: Forwarding address (local address from passt's PoV)
+ * @cport: Correspondent port
+ * @fport: Forwarding port
+ */
+struct demiflow {
+ union inany_addr faddr;
+ union inany_addr caddr;
+ in_port_t fport, cport;
+};
+
+/** demiflow_from_af - Initialize a demiflow from addresses
+ * @df: demiflow to initialize
+ * @af: Address family for @faddr and @caddr
+ * @faddr: Forwarding address (pointer to in_addr or in6_addr)
+ * @fport: Forwarding port
+ * @caddr: Correspondent address (pointer to in_addr or in6_addr)
+ * @cport: Correspondent port
+ */
+static inline void demiflow_from_af(struct demiflow *df, int af,
+ const void *faddr, in_port_t fport,
+ const void *caddr, in_port_t cport)
+{
+ inany_from_af(&df->faddr, af, faddr);
+ inany_from_af(&df->caddr, af, caddr);
+ df->fport = fport;
+ df->cport = cport;
+}
+
+#endif /* FLOW_H */
diff --git a/tcp.c b/tcp.c
index 6c4d71e..c1875c3 100644
--- a/tcp.c
+++ b/tcp.c
@@ -302,6 +302,7 @@
#include "tcp_splice.h"
#include "log.h"
#include "inany.h"
+#include "flow.h"
#include "tcp_conn.h"
@@ -399,7 +400,7 @@ struct tcp6_l2_head { /* For MSS6 macro: keep in sync with tcp6_l2_buf_t */
#define OPT_SACK 5
#define OPT_TS 8
-#define CONN_V4(conn) (!!inany_v4(&(conn)->faddr))
+#define CONN_V4(conn) (!!inany_v4(&(conn)->tapflow.faddr))
#define CONN_V6(conn) (!CONN_V4(conn))
#define CONN_IS_CLOSING(conn) \
((conn->events & ESTABLISHED) && \
@@ -852,7 +853,7 @@ static int tcp_rtt_dst_low(const struct tcp_tap_conn *conn)
int i;
for (i = 0; i < LOW_RTT_TABLE_SIZE; i++)
- if (inany_equals(&conn->faddr, low_rtt_dst + i))
+ if (inany_equals(&conn->tapflow.faddr, low_rtt_dst + i))
return 1;
return 0;
@@ -874,7 +875,7 @@ static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn,
return;
for (i = 0; i < LOW_RTT_TABLE_SIZE; i++) {
- if (inany_equals(&conn->faddr, low_rtt_dst + i))
+ if (inany_equals(&conn->tapflow.faddr, low_rtt_dst + i))
return;
if (hole == -1 && IN6_IS_ADDR_UNSPECIFIED(low_rtt_dst + i))
hole = i;
@@ -886,7 +887,7 @@ static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn,
if (hole == -1)
return;
- low_rtt_dst[hole++] = conn->faddr;
+ low_rtt_dst[hole++] = conn->tapflow.faddr;
if (hole == LOW_RTT_TABLE_SIZE)
hole = 0;
inany_from_af(low_rtt_dst + hole, AF_INET6, &in6addr_any);
@@ -1151,8 +1152,8 @@ static int tcp_hash_match(const struct tcp_tap_conn *conn,
const union inany_addr *faddr,
in_port_t cport, in_port_t fport)
{
- if (inany_equals(&conn->faddr, faddr) &&
- conn->cport == cport && conn->fport == fport)
+ if (inany_equals(&conn->tapflow.faddr, faddr) &&
+ conn->tapflow.cport == cport && conn->tapflow.fport == fport)
return 1;
return 0;
@@ -1194,7 +1195,8 @@ static unsigned int tcp_hash(const struct ctx *c, const union inany_addr *faddr,
static unsigned int tcp_conn_hash(const struct ctx *c,
const struct tcp_tap_conn *conn)
{
- return tcp_hash(c, &conn->faddr, conn->cport, conn->fport);
+ return tcp_hash(c, &conn->tapflow.faddr,
+ conn->tapflow.cport, conn->tapflow.fport);
}
/**
@@ -1206,7 +1208,8 @@ static void tcp_hash_insert(const struct ctx *c, struct tcp_tap_conn *conn)
{
int b;
- b = tcp_hash(c, &conn->faddr, conn->cport, conn->fport);
+ b = tcp_hash(c, &conn->tapflow.faddr,
+ conn->tapflow.cport, conn->tapflow.fport);
conn->next_index = tc_hash[b] ? CONN_IDX(tc_hash[b]) : -1;
tc_hash[b] = conn;
@@ -1425,13 +1428,13 @@ static size_t tcp_l2_buf_fill_headers(const struct ctx *c,
void *p, size_t plen,
const uint16_t *check, uint32_t seq)
{
- const struct in_addr *a4 = inany_v4(&conn->faddr);
+ const struct in_addr *a4 = inany_v4(&conn->tapflow.faddr);
size_t ip_len, tlen;
#define SET_TCP_HEADER_COMMON_V4_V6(b, conn, seq) \
do { \
- b->th.source = htons(conn->fport); \
- b->th.dest = htons(conn->cport); \
+ b->th.source = htons(conn->tapflow.fport); \
+ b->th.dest = htons(conn->tapflow.cport); \
b->th.seq = htonl(seq); \
b->th.ack_seq = htonl(conn->seq_ack_to_tap); \
if (conn->events & ESTABLISHED) { \
@@ -1449,7 +1452,7 @@ do { \
ip_len = plen + sizeof(struct iphdr) + sizeof(struct tcphdr);
b->iph.tot_len = htons(ip_len);
b->iph.saddr = a4->s_addr;
- b->iph.daddr = inany_v4(&conn->caddr)->s_addr;
+ b->iph.daddr = inany_v4(&conn->tapflow.caddr)->s_addr;
if (check)
b->iph.check = *check;
@@ -1467,8 +1470,8 @@ do { \
ip_len = plen + sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
b->ip6h.payload_len = htons(plen + sizeof(struct tcphdr));
- b->ip6h.saddr = conn->faddr.a6;
- b->ip6h.daddr = conn->caddr.a6;
+ b->ip6h.saddr = conn->tapflow.faddr.a6;
+ b->ip6h.daddr = conn->tapflow.caddr.a6;
memset(b->ip6h.flow_lbl, 0, 3);
@@ -1829,10 +1832,10 @@ static void tcp_seq_init(const struct ctx *c, struct tcp_tap_conn *conn,
union inany_addr dst;
in_port_t dstport;
} __attribute__((__packed__)) in = {
- .src = conn->faddr,
- .srcport = conn->cport,
- .dst = conn->caddr,
- .dstport = conn->fport,
+ .src = conn->tapflow.faddr,
+ .srcport = conn->tapflow.cport,
+ .dst = conn->tapflow.caddr,
+ .dstport = conn->tapflow.fport,
};
uint32_t ns, seq = 0;
@@ -2049,10 +2052,8 @@ static void tcp_conn_from_tap(struct ctx *c,
if (!(conn->wnd_from_tap = (htons(th->window) >> conn->ws_from_tap)))
conn->wnd_from_tap = 1;
- inany_from_af(&conn->faddr, af, daddr);
- inany_from_af(&conn->caddr, af, saddr);
- conn->fport = ntohs(th->dest);
- conn->cport = ntohs(th->source);
+ demiflow_from_af(&conn->tapflow, af, daddr, ntohs(th->dest),
+ saddr, ntohs(th->source));
if (af == AF_INET) {
sa = (struct sockaddr *)&addr4;
@@ -2719,18 +2720,18 @@ static void tcp_tap_conn_from_sock(struct ctx *c, union epoll_ref ref,
conn->ws_to_tap = conn->ws_from_tap = 0;
conn_event(c, conn, SOCK_ACCEPTED);
- inany_from_sockaddr(&conn->faddr, &conn->fport, sa);
- tcp_snat_inbound(c, &conn->faddr);
+ inany_from_sockaddr(&conn->tapflow.faddr, &conn->tapflow.fport, sa);
+ tcp_snat_inbound(c, &conn->tapflow.faddr);
if (CONN_V4(conn)) {
- inany_from_af(&conn->caddr, AF_INET, &c->ip4.addr_seen);
+ inany_from_af(&conn->tapflow.caddr, AF_INET, &c->ip4.addr_seen);
} else {
- if (IN6_IS_ADDR_LINKLOCAL(&conn->faddr.a6))
- conn->caddr.a6 = c->ip6.addr_ll_seen;
+ if (IN6_IS_ADDR_LINKLOCAL(&conn->tapflow.faddr.a6))
+ conn->tapflow.caddr.a6 = c->ip6.addr_ll_seen;
else
- conn->caddr.a6 = c->ip6.addr_seen;
+ conn->tapflow.caddr.a6 = c->ip6.addr_seen;
}
- conn->cport = ref.r.p.tcp.tcp.index;
+ conn->tapflow.cport = ref.r.p.tcp.tcp.index;
tcp_seq_init(c, conn, now);
tcp_hash_insert(c, conn);
diff --git a/tcp_conn.h b/tcp_conn.h
index 9151c18..92d4637 100644
--- a/tcp_conn.h
+++ b/tcp_conn.h
@@ -35,10 +35,7 @@ extern const char *tcp_common_flag_str[];
* @ws_to_tap: Window scaling factor advertised to tap/guest
* @sndbuf: Sending buffer in kernel, rounded to 2 ^ SNDBUF_BITS
* @seq_dup_ack_approx: Last duplicate ACK number sent to tap
- * @caddr: Guest side correspondent address (guest's local address)
- * @faddr: Guest side forwarding address (guest's remote address)
- * @cport: Guest side correspondent port (guest's local port)
- * @fport: Guest side forwarding port (guest's remote port)
+ * @tapflow: Tap(guest)-side demiflow
* @wnd_from_tap: Last window size from tap, unscaled (as received)
* @wnd_to_tap: Sending window advertised to tap, unscaled (as sent)
* @seq_to_tap: Next sequence for packets to tap
@@ -105,11 +102,7 @@ struct tcp_tap_conn {
uint8_t seq_dup_ack_approx;
-
- union inany_addr caddr;
- union inany_addr faddr;
- in_port_t cport;
- in_port_t fport;
+ struct demiflow tapflow;
uint16_t wnd_from_tap;
uint16_t wnd_to_tap;
diff --git a/tcp_splice.c b/tcp_splice.c
index 71256b0..a1aeff7 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -53,6 +53,7 @@
#include "log.h"
#include "tcp_splice.h"
#include "inany.h"
+#include "flow.h"
#include "tcp_conn.h"
--
@@ -53,6 +53,7 @@
#include "log.h"
#include "tcp_splice.h"
#include "inany.h"
+#include "flow.h"
#include "tcp_conn.h"
--
2.41.0
next prev parent reply other threads:[~2023-07-28 9:48 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-07-28 9:48 [PATCH 0/8] RFC: Generalize flow tracking, part 1 David Gibson
2023-07-28 9:48 ` [PATCH 1/8] tap: Don't clobber source address in tap6_handler() David Gibson
2023-07-28 9:48 ` [PATCH 2/8] tap: Pass source address to protocol handler functions David Gibson
2023-07-28 9:48 ` [PATCH 3/8] tcp: More precise terms for addresses and ports David Gibson
2023-07-28 9:48 ` [PATCH 4/8] tcp, udp: Don't include destination address in partially precomputed csums David Gibson
2023-07-28 9:48 ` [PATCH 5/8] tcp, udp: Don't pre-fill IPv4 destination address in headers David Gibson
2023-07-28 9:48 ` [PATCH 6/8] tcp: Track guest-side correspondent address David Gibson
2023-07-28 9:48 ` David Gibson [this message]
2023-07-28 9:48 ` [PATCH 8/8] tcp, flow: Perform TCP hash calculations based on demiflow structure David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230728094831.4097571-8-david@gibson.dropbear.id.au \
--to=david@gibson.dropbear.id.au \
--cc=passt-dev@passt.top \
--cc=sbrivio@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).