public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>, passt-dev@passt.top
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v5 13/19] flow, tcp: Generalise TCP hash table to general flow hash table
Date: Tue, 14 May 2024 11:03:31 +1000	[thread overview]
Message-ID: <20240514010337.1104606-14-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20240514010337.1104606-1-david@gibson.dropbear.id.au>

Move the data structures and helper functions for the TCP hash table to
flow.c, making it a general hash table indexing sides of flows.  This is
largely code motion and straightforward renames.  There are two semantic
changes:

 * flow_lookup_af() now needs to verify that the entry has a matching
   protocol as well as matching addresses, ports and interface

 * We double the size of the hash table, because it's now at least
   theoretically possible for both sides of each flow to be hashed.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 flow.c       | 146 ++++++++++++++++++++++++++++++++++++++++++++++++-
 flow.h       |   7 +++
 flow_table.h |   3 --
 tcp.c        | 149 +++++----------------------------------------------
 4 files changed, 165 insertions(+), 140 deletions(-)

diff --git a/flow.c b/flow.c
index fdd22b7..30a6904 100644
--- a/flow.c
+++ b/flow.c
@@ -108,6 +108,16 @@ static const union flow *flow_new_entry; /* = NULL */
 /* Last time the flow timers ran */
 static struct timespec flow_timer_run;
 
+/* Hash table to index it */
+#define FLOW_HASH_LOAD		70		/* % */
+#define FLOW_HASH_SIZE		((2 * FLOW_MAX * 100 / FLOW_HASH_LOAD))
+
+/* Table for lookup from flowside information */
+static flow_sidx_t flow_hashtab[FLOW_HASH_SIZE];
+
+static_assert(ARRAY_SIZE(flow_hashtab) >= 2 * FLOW_MAX,
+"Safe linear probing requires hash table with more entries than the number of sides in the flow table");
+
 /** flowside_from_af() - Initialise flowside from addresses
  * @fside:	flowside to initialise
  * @af:		Address family (AF_INET or AF_INET6)
@@ -406,8 +416,8 @@ void flow_alloc_cancel(union flow *flow)
  *
  * Return: hash value
  */
-uint64_t flow_hash(const struct ctx *c, uint8_t proto, uint8_t pif,
-		   const struct flowside *fside)
+static uint64_t flow_hash(const struct ctx *c, uint8_t proto, uint8_t pif,
+			  const struct flowside *fside)
 {
 	struct siphash_state state = SIPHASH_INIT(c->hash_secret);
 
@@ -426,6 +436,133 @@ uint64_t flow_hash(const struct ctx *c, uint8_t proto, uint8_t pif,
 			     fside->fport << 16 | fside->eport);
 }
 
+/**
+ * flow_sidx_hash() - Calculate hash value for given side of a given flow
+ * @c:		Execution context
+ * @sidx:	Flow & side index to get hash for
+ *
+ * Return: hash value, of the flow & side represented by @sidx
+ */
+static uint64_t flow_sidx_hash(const struct ctx *c, flow_sidx_t sidx)
+{
+	const struct flow_common *f = &flow_at_sidx(sidx)->f;
+	return flow_hash(c, FLOW_PROTO(f),
+			 f->pif[sidx.side], &f->side[sidx.side]);
+}
+
+/**
+ * flow_hash_probe() - Find hash bucket for a flow
+ * @c:		Execution context
+ * @sidx:	Flow and side to find bucket for
+ *
+ * Return: If @sidx is in the hash table, its current bucket, otherwise a
+ *         suitable free bucket for it.
+ */
+static inline unsigned flow_hash_probe(const struct ctx *c, flow_sidx_t sidx)
+{
+	unsigned b = flow_sidx_hash(c, sidx) % FLOW_HASH_SIZE;
+
+	/* Linear probing */
+	while (!flow_sidx_eq(flow_hashtab[b], FLOW_SIDX_NONE) &&
+	       !flow_sidx_eq(flow_hashtab[b], sidx))
+		b = mod_sub(b, 1, FLOW_HASH_SIZE);
+
+	return b;
+}
+
+/**
+ * flow_hash_insert() - Insert side of a flow into into hash table
+ * @c:		Execution context
+ * @sidx:	Flow & side index
+ */
+void flow_hash_insert(const struct ctx *c, flow_sidx_t sidx)
+{
+	unsigned b = flow_hash_probe(c, sidx);
+
+	flow_hashtab[b] = sidx;
+	flow_dbg(flow_at_sidx(sidx), "hash table insert: bucket: %u", b);
+}
+
+/**
+ * flow_hash_remove() - Drop side of a flow from the hash table
+ * @c:		Execution context
+ * @sidx:	Side of flow to remove
+ */
+void flow_hash_remove(const struct ctx *c, flow_sidx_t sidx)
+{
+	unsigned b = flow_hash_probe(c, sidx), s;
+
+	if (flow_sidx_eq(flow_hashtab[b], FLOW_SIDX_NONE))
+		return; /* Redundant remove */
+
+	flow_dbg(flow_at_sidx(sidx), "hash table remove: bucket: %u", b);
+
+	/* Scan the remainder of the cluster */
+	for (s = mod_sub(b, 1, FLOW_HASH_SIZE);
+	     !flow_sidx_eq(flow_hashtab[s], FLOW_SIDX_NONE);
+	     s = mod_sub(s, 1, FLOW_HASH_SIZE)) {
+		unsigned h = flow_sidx_hash(c, flow_hashtab[s]) % FLOW_HASH_SIZE;
+
+		if (!mod_between(h, s, b, FLOW_HASH_SIZE)) {
+			/* flow_hashtab[s] can live in flow_hashtab[b]'s slot */
+			debug("hash table remove: shuffle %u -> %u", s, b);
+			flow_hashtab[b] = flow_hashtab[s];
+			b = s;
+		}
+	}
+
+	flow_hashtab[b] = FLOW_SIDX_NONE;
+}
+
+/**
+ * flowside_lookup() - Look for a matching flowside in the flow table
+ * @c:		Execution context
+ * @proto:	Protocol of the flow (IP L4 protocol number)
+ * @pif:	pif to look for in the table
+ * @fside:	Flowside to look for in the table
+ *
+ * Return: sidx of the matching flow & side, FLOW_SIDX_NONE if not found
+ */
+static flow_sidx_t flowside_lookup(const struct ctx *c, uint8_t proto,
+				   uint8_t pif, const struct flowside *fside)
+{
+	union flow *flow;
+	int b;
+
+	b = flow_hash(c, proto, pif, fside) % FLOW_HASH_SIZE;
+	while ((flow = flow_at_sidx(flow_hashtab[b])) &&
+	       FLOW_PROTO(&flow->f) == proto &&
+	       !(flow->f.pif[flow_hashtab[b].side] == pif &&
+		 flowside_eq(&flow->f.side[flow_hashtab[b].side], fside)))
+		b = (b + 1) % FLOW_HASH_SIZE;
+
+	return flow_hashtab[b];
+}
+
+/**
+ * flow_lookup_af() - Look up a flow given addressing information
+ * @c:		Execution context
+ * @proto:	Protocol of the flow (IP L4 protocol number)
+ * @pif:	Interface of the flow
+ * @af:		Address family, AF_INET or AF_INET6
+ * @eaddr:	Guest side endpoint address (guest local address)
+ * @faddr:	Guest side forwarding address (guest remote address)
+ * @eport:	Guest side endpoint port (guest local port)
+ * @fport:	Guest side forwarding port (guest remote port)
+ *
+ * Return: sidx of the matching flow & side, FLOW_SIDX_NONE if not found
+ */
+flow_sidx_t flow_lookup_af(const struct ctx *c,
+			   uint8_t proto, uint8_t pif, sa_family_t af,
+			   const void *eaddr, const void *faddr,
+			   in_port_t eport, in_port_t fport)
+{
+	struct flowside fside;
+
+	flowside_from_af(&fside, af, eaddr, eport, faddr, fport);
+	return flowside_lookup(c, proto, pif, &fside);
+}
+
 /**
  * flow_defer_handler() - Handler for per-flow deferred and timed tasks
  * @c:		Execution context
@@ -535,7 +672,12 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
  */
 void flow_init(void)
 {
+	unsigned b;
+
 	/* Initial state is a single free cluster containing the whole table */
 	flowtab[0].free.n = FLOW_MAX;
 	flowtab[0].free.next = FLOW_MAX;
+
+	for (b = 0; b < FLOW_HASH_SIZE; b++)
+		flow_hashtab[b] = FLOW_SIDX_NONE;
 }
diff --git a/flow.h b/flow.h
index 6d68e09..0ba00da 100644
--- a/flow.h
+++ b/flow.h
@@ -211,6 +211,13 @@ static inline bool flow_sidx_eq(flow_sidx_t a, flow_sidx_t b)
 	return (a.flow == b.flow) && (a.side == b.side);
 }
 
+void flow_hash_insert(const struct ctx *c, flow_sidx_t sidx);
+void flow_hash_remove(const struct ctx *c, flow_sidx_t sidx);
+flow_sidx_t flow_lookup_af(const struct ctx *c,
+			   uint8_t proto, uint8_t pif, sa_family_t af,
+			   const void *eaddr, const void *faddr,
+			   in_port_t eport, in_port_t fport);
+
 union flow;
 
 void flow_init(void);
diff --git a/flow_table.h b/flow_table.h
index 0083c87..d17ffba 100644
--- a/flow_table.h
+++ b/flow_table.h
@@ -126,7 +126,4 @@ void flow_activate(struct flow_common *f);
 #define FLOW_ACTIVATE(flow_)			\
 	(flow_activate(&(flow_)->f))
 
-uint64_t flow_hash(const struct ctx *c, uint8_t proto, uint8_t pif,
-		   const struct flowside *fside);
-
 #endif /* FLOW_TABLE_H */
diff --git a/tcp.c b/tcp.c
index 983a537..8ab8c4d 100644
--- a/tcp.c
+++ b/tcp.c
@@ -307,9 +307,6 @@
 #define TCP_FRAMES							\
 	(c->mode == MODE_PASST ? TCP_FRAMES_MEM : 1)
 
-#define TCP_HASH_TABLE_LOAD		70		/* % */
-#define TCP_HASH_TABLE_SIZE		(FLOW_MAX * 100 / TCP_HASH_TABLE_LOAD)
-
 #define MAX_WS				8
 #define MAX_WINDOW			(1 << (16 + (MAX_WS)))
 
@@ -370,6 +367,7 @@
 
 #define TAPSIDE(conn_)	((conn_)->f.pif[1] == PIF_TAP)
 #define TAPFLOW(conn_)	(&((conn_)->f.side[TAPSIDE(conn_)]))
+#define TAP_SIDX(conn_)	(FLOW_SIDX((conn_), TAPSIDE(conn_)))
 
 #define CONN_V4(conn)		(!!inany_v4(&TAPFLOW(conn)->faddr))
 #define CONN_V6(conn)		(!CONN_V4(conn))
@@ -523,12 +521,6 @@ static struct iovec	tcp_iov			[UIO_MAXIOV];
 
 #define CONN(idx)		(&(FLOW(idx)->tcp))
 
-/* Table for lookup from flowside information */
-static flow_sidx_t tc_hash[TCP_HASH_TABLE_SIZE];
-
-static_assert(ARRAY_SIZE(tc_hash) >= FLOW_MAX,
-	"Safe linear probing requires hash table larger than connection table");
-
 /* Pools for pre-opened sockets (in init) */
 int init_sock_pool4		[TCP_SOCK_POOL_SIZE];
 int init_sock_pool6		[TCP_SOCK_POOL_SIZE];
@@ -722,9 +714,6 @@ static void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,
 		tcp_timer_ctl(c, conn);
 }
 
-static void tcp_hash_remove(const struct ctx *c,
-			    const struct tcp_tap_conn *conn);
-
 /**
  * conn_event_do() - Set and log connection events, update epoll state
  * @c:		Execution context
@@ -770,7 +759,7 @@ static void conn_event_do(const struct ctx *c, struct tcp_tap_conn *conn,
 			 num == -1 	       ? "CLOSED" : tcp_event_str[num]);
 
 	if (event == CLOSED)
-		tcp_hash_remove(c, conn);
+		flow_hash_remove(c, TAP_SIDX(conn));
 	else if ((event == TAP_FIN_RCVD) && !(conn->events & SOCK_FIN_RCVD))
 		conn_flag(c, conn, ACTIVE_CLOSE);
 	else
@@ -1073,118 +1062,6 @@ static int tcp_opt_get(const char *opts, size_t len, uint8_t type_find,
 	return -1;
 }
 
-/**
- * tcp_conn_hash() - Calculate hash bucket of an existing connection
- * @c:		Execution context
- * @conn:	Connection
- *
- * Return: hash value, needs to be adjusted for table size
- */
-static uint64_t tcp_conn_hash(const struct ctx *c,
-			      const struct tcp_tap_conn *conn)
-{
-	const struct flowside *tapside = TAPFLOW(conn);
-
-	return flow_hash(c, IPPROTO_TCP, conn->f.pif[TAPSIDE(conn)], tapside);
-}
-
-/**
- * tcp_hash_probe() - Find hash bucket for a connection
- * @c:		Execution context
- * @conn:	Connection to find bucket for
- *
- * Return: If @conn is in the table, its current bucket, otherwise a suitable
- *         free bucket for it.
- */
-static inline unsigned tcp_hash_probe(const struct ctx *c,
-				      const struct tcp_tap_conn *conn)
-{
-	unsigned b = tcp_conn_hash(c, conn) % TCP_HASH_TABLE_SIZE;
-	flow_sidx_t sidx = FLOW_SIDX(conn, TAPSIDE(conn));
-
-	/* Linear probing */
-	while (!flow_sidx_eq(tc_hash[b], FLOW_SIDX_NONE) &&
-	       !flow_sidx_eq(tc_hash[b], sidx))
-		b = mod_sub(b, 1, TCP_HASH_TABLE_SIZE);
-
-	return b;
-}
-
-/**
- * tcp_hash_insert() - Insert connection into hash table, chain link
- * @c:		Execution context
- * @conn:	Connection pointer
- */
-static void tcp_hash_insert(const struct ctx *c, struct tcp_tap_conn *conn)
-{
-	unsigned b = tcp_hash_probe(c, conn);
-
-	tc_hash[b] = FLOW_SIDX(conn, TAPSIDE(conn));
-	flow_dbg(conn, "hash table insert: sock %i, bucket: %u", conn->sock, b);
-}
-
-/**
- * tcp_hash_remove() - Drop connection from hash table, chain unlink
- * @c:		Execution context
- * @conn:	Connection pointer
- */
-static void tcp_hash_remove(const struct ctx *c,
-			    const struct tcp_tap_conn *conn)
-{
-	unsigned b = tcp_hash_probe(c, conn), s;
-	union flow *flow = flow_at_sidx(tc_hash[b]);
-
-	if (!flow)
-		return; /* Redundant remove */
-
-	flow_dbg(conn, "hash table remove: sock %i, bucket: %u", conn->sock, b);
-
-	/* Scan the remainder of the cluster */
-	for (s = mod_sub(b, 1, TCP_HASH_TABLE_SIZE);
-	     (flow = flow_at_sidx(tc_hash[s]));
-	     s = mod_sub(s, 1, TCP_HASH_TABLE_SIZE)) {
-		unsigned h = tcp_conn_hash(c, &flow->tcp) % TCP_HASH_TABLE_SIZE;
-
-		if (!mod_between(h, s, b, TCP_HASH_TABLE_SIZE)) {
-			/* tc_hash[s] can live in tc_hash[b]'s slot */
-			debug("hash table remove: shuffle %u -> %u", s, b);
-			tc_hash[b] = tc_hash[s];
-			b = s;
-		}
-	}
-
-	tc_hash[b] = FLOW_SIDX_NONE;
-}
-
-/**
- * tcp_hash_lookup() - Look up connection given remote address and ports
- * @c:		Execution context
- * @af:		Address family, AF_INET or AF_INET6
- * @eaddr:	Guest side endpoint address (guest local address)
- * @faddr:	Guest side forwarding address (guest remote address)
- * @eport:	Guest side endpoint port (guest local port)
- * @fport:	Guest side forwarding port (guest remote port)
- *
- * Return: connection pointer, if found, -ENOENT otherwise
- */
-static struct tcp_tap_conn *tcp_hash_lookup(const struct ctx *c, sa_family_t af,
-					    const void *eaddr, const void *faddr,
-					    in_port_t eport, in_port_t fport)
-{
-	struct flowside fside;
-	union flow *flow;
-	unsigned b;
-
-	flowside_from_af(&fside, af, eaddr, eport, faddr, fport);
-
-	b = flow_hash(c, IPPROTO_TCP, PIF_TAP, &fside) % TCP_HASH_TABLE_SIZE;
-	while ((flow = flow_at_sidx(tc_hash[b])) &&
-	       !flowside_eq(&flow->f.side[TAPSIDE(flow)], &fside))
-		b = mod_sub(b, 1, TCP_HASH_TABLE_SIZE);
-
-	return &flow->tcp;
-}
-
 /**
  * tcp_flow_defer() - Deferred per-flow handling (clean up closed connections)
  * @flow:	Flow table entry for this connection
@@ -1972,7 +1849,7 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
 	tcp_seq_init(c, conn, now);
 	conn->seq_ack_from_tap = conn->seq_to_tap;
 
-	tcp_hash_insert(c, conn);
+	flow_hash_insert(c, TAP_SIDX(conn));
 
 	sockaddr_from_inany(&sa, &sl, &fwd->eaddr, fwd->eport, c->ifi6);
 
@@ -2468,6 +2345,8 @@ int tcp_tap_handler(struct ctx *c, uint8_t pif, sa_family_t af,
 	const struct tcphdr *th;
 	size_t optlen, len;
 	const char *opts;
+	union flow *flow;
+	flow_sidx_t sidx;
 	int ack_due = 0;
 	int count;
 
@@ -2483,17 +2362,22 @@ int tcp_tap_handler(struct ctx *c, uint8_t pif, sa_family_t af,
 	optlen = MIN(optlen, ((1UL << 4) /* from doff width */ - 6) * 4UL);
 	opts = packet_get(p, idx, sizeof(*th), optlen, NULL);
 
-	conn = tcp_hash_lookup(c, af, saddr, daddr,
-			       ntohs(th->source), ntohs(th->dest));
+	sidx = flow_lookup_af(c, IPPROTO_TCP, PIF_TAP, af, saddr, daddr,
+			      ntohs(th->source), ntohs(th->dest));
+	flow = flow_at_sidx(sidx);
 
 	/* New connection from tap */
-	if (!conn) {
+	if (!flow) {
 		if (opts && th->syn && !th->ack)
 			tcp_conn_from_tap(c, af, saddr, daddr, th,
 					  opts, optlen, now);
 		return 1;
 	}
 
+	ASSERT(flow->f.type == FLOW_TCP);
+	ASSERT(flow->f.pif[sidx.side] == PIF_TAP);
+	conn = &flow->tcp;
+
 	flow_trace(conn, "packet length %zu from tap", len);
 
 	if (th->rst) {
@@ -2676,7 +2560,7 @@ static void tcp_tap_conn_from_sock(struct ctx *c, in_port_t dstport,
 	conn_event(c, conn, SOCK_ACCEPTED);
 
 	tcp_seq_init(c, conn, now);
-	tcp_hash_insert(c, conn);
+	flow_hash_insert(c, TAP_SIDX(conn));
 
 	conn->seq_ack_from_tap = conn->seq_to_tap;
 
@@ -3065,11 +2949,6 @@ static void tcp_sock_refill_init(const struct ctx *c)
  */
 int tcp_init(struct ctx *c)
 {
-	unsigned b;
-
-	for (b = 0; b < TCP_HASH_TABLE_SIZE; b++)
-		tc_hash[b] = FLOW_SIDX_NONE;
-
 	if (c->ifi4)
 		tcp_sock4_iov_init(c);
 
-- 
@@ -307,9 +307,6 @@
 #define TCP_FRAMES							\
 	(c->mode == MODE_PASST ? TCP_FRAMES_MEM : 1)
 
-#define TCP_HASH_TABLE_LOAD		70		/* % */
-#define TCP_HASH_TABLE_SIZE		(FLOW_MAX * 100 / TCP_HASH_TABLE_LOAD)
-
 #define MAX_WS				8
 #define MAX_WINDOW			(1 << (16 + (MAX_WS)))
 
@@ -370,6 +367,7 @@
 
 #define TAPSIDE(conn_)	((conn_)->f.pif[1] == PIF_TAP)
 #define TAPFLOW(conn_)	(&((conn_)->f.side[TAPSIDE(conn_)]))
+#define TAP_SIDX(conn_)	(FLOW_SIDX((conn_), TAPSIDE(conn_)))
 
 #define CONN_V4(conn)		(!!inany_v4(&TAPFLOW(conn)->faddr))
 #define CONN_V6(conn)		(!CONN_V4(conn))
@@ -523,12 +521,6 @@ static struct iovec	tcp_iov			[UIO_MAXIOV];
 
 #define CONN(idx)		(&(FLOW(idx)->tcp))
 
-/* Table for lookup from flowside information */
-static flow_sidx_t tc_hash[TCP_HASH_TABLE_SIZE];
-
-static_assert(ARRAY_SIZE(tc_hash) >= FLOW_MAX,
-	"Safe linear probing requires hash table larger than connection table");
-
 /* Pools for pre-opened sockets (in init) */
 int init_sock_pool4		[TCP_SOCK_POOL_SIZE];
 int init_sock_pool6		[TCP_SOCK_POOL_SIZE];
@@ -722,9 +714,6 @@ static void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,
 		tcp_timer_ctl(c, conn);
 }
 
-static void tcp_hash_remove(const struct ctx *c,
-			    const struct tcp_tap_conn *conn);
-
 /**
  * conn_event_do() - Set and log connection events, update epoll state
  * @c:		Execution context
@@ -770,7 +759,7 @@ static void conn_event_do(const struct ctx *c, struct tcp_tap_conn *conn,
 			 num == -1 	       ? "CLOSED" : tcp_event_str[num]);
 
 	if (event == CLOSED)
-		tcp_hash_remove(c, conn);
+		flow_hash_remove(c, TAP_SIDX(conn));
 	else if ((event == TAP_FIN_RCVD) && !(conn->events & SOCK_FIN_RCVD))
 		conn_flag(c, conn, ACTIVE_CLOSE);
 	else
@@ -1073,118 +1062,6 @@ static int tcp_opt_get(const char *opts, size_t len, uint8_t type_find,
 	return -1;
 }
 
-/**
- * tcp_conn_hash() - Calculate hash bucket of an existing connection
- * @c:		Execution context
- * @conn:	Connection
- *
- * Return: hash value, needs to be adjusted for table size
- */
-static uint64_t tcp_conn_hash(const struct ctx *c,
-			      const struct tcp_tap_conn *conn)
-{
-	const struct flowside *tapside = TAPFLOW(conn);
-
-	return flow_hash(c, IPPROTO_TCP, conn->f.pif[TAPSIDE(conn)], tapside);
-}
-
-/**
- * tcp_hash_probe() - Find hash bucket for a connection
- * @c:		Execution context
- * @conn:	Connection to find bucket for
- *
- * Return: If @conn is in the table, its current bucket, otherwise a suitable
- *         free bucket for it.
- */
-static inline unsigned tcp_hash_probe(const struct ctx *c,
-				      const struct tcp_tap_conn *conn)
-{
-	unsigned b = tcp_conn_hash(c, conn) % TCP_HASH_TABLE_SIZE;
-	flow_sidx_t sidx = FLOW_SIDX(conn, TAPSIDE(conn));
-
-	/* Linear probing */
-	while (!flow_sidx_eq(tc_hash[b], FLOW_SIDX_NONE) &&
-	       !flow_sidx_eq(tc_hash[b], sidx))
-		b = mod_sub(b, 1, TCP_HASH_TABLE_SIZE);
-
-	return b;
-}
-
-/**
- * tcp_hash_insert() - Insert connection into hash table, chain link
- * @c:		Execution context
- * @conn:	Connection pointer
- */
-static void tcp_hash_insert(const struct ctx *c, struct tcp_tap_conn *conn)
-{
-	unsigned b = tcp_hash_probe(c, conn);
-
-	tc_hash[b] = FLOW_SIDX(conn, TAPSIDE(conn));
-	flow_dbg(conn, "hash table insert: sock %i, bucket: %u", conn->sock, b);
-}
-
-/**
- * tcp_hash_remove() - Drop connection from hash table, chain unlink
- * @c:		Execution context
- * @conn:	Connection pointer
- */
-static void tcp_hash_remove(const struct ctx *c,
-			    const struct tcp_tap_conn *conn)
-{
-	unsigned b = tcp_hash_probe(c, conn), s;
-	union flow *flow = flow_at_sidx(tc_hash[b]);
-
-	if (!flow)
-		return; /* Redundant remove */
-
-	flow_dbg(conn, "hash table remove: sock %i, bucket: %u", conn->sock, b);
-
-	/* Scan the remainder of the cluster */
-	for (s = mod_sub(b, 1, TCP_HASH_TABLE_SIZE);
-	     (flow = flow_at_sidx(tc_hash[s]));
-	     s = mod_sub(s, 1, TCP_HASH_TABLE_SIZE)) {
-		unsigned h = tcp_conn_hash(c, &flow->tcp) % TCP_HASH_TABLE_SIZE;
-
-		if (!mod_between(h, s, b, TCP_HASH_TABLE_SIZE)) {
-			/* tc_hash[s] can live in tc_hash[b]'s slot */
-			debug("hash table remove: shuffle %u -> %u", s, b);
-			tc_hash[b] = tc_hash[s];
-			b = s;
-		}
-	}
-
-	tc_hash[b] = FLOW_SIDX_NONE;
-}
-
-/**
- * tcp_hash_lookup() - Look up connection given remote address and ports
- * @c:		Execution context
- * @af:		Address family, AF_INET or AF_INET6
- * @eaddr:	Guest side endpoint address (guest local address)
- * @faddr:	Guest side forwarding address (guest remote address)
- * @eport:	Guest side endpoint port (guest local port)
- * @fport:	Guest side forwarding port (guest remote port)
- *
- * Return: connection pointer, if found, -ENOENT otherwise
- */
-static struct tcp_tap_conn *tcp_hash_lookup(const struct ctx *c, sa_family_t af,
-					    const void *eaddr, const void *faddr,
-					    in_port_t eport, in_port_t fport)
-{
-	struct flowside fside;
-	union flow *flow;
-	unsigned b;
-
-	flowside_from_af(&fside, af, eaddr, eport, faddr, fport);
-
-	b = flow_hash(c, IPPROTO_TCP, PIF_TAP, &fside) % TCP_HASH_TABLE_SIZE;
-	while ((flow = flow_at_sidx(tc_hash[b])) &&
-	       !flowside_eq(&flow->f.side[TAPSIDE(flow)], &fside))
-		b = mod_sub(b, 1, TCP_HASH_TABLE_SIZE);
-
-	return &flow->tcp;
-}
-
 /**
  * tcp_flow_defer() - Deferred per-flow handling (clean up closed connections)
  * @flow:	Flow table entry for this connection
@@ -1972,7 +1849,7 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
 	tcp_seq_init(c, conn, now);
 	conn->seq_ack_from_tap = conn->seq_to_tap;
 
-	tcp_hash_insert(c, conn);
+	flow_hash_insert(c, TAP_SIDX(conn));
 
 	sockaddr_from_inany(&sa, &sl, &fwd->eaddr, fwd->eport, c->ifi6);
 
@@ -2468,6 +2345,8 @@ int tcp_tap_handler(struct ctx *c, uint8_t pif, sa_family_t af,
 	const struct tcphdr *th;
 	size_t optlen, len;
 	const char *opts;
+	union flow *flow;
+	flow_sidx_t sidx;
 	int ack_due = 0;
 	int count;
 
@@ -2483,17 +2362,22 @@ int tcp_tap_handler(struct ctx *c, uint8_t pif, sa_family_t af,
 	optlen = MIN(optlen, ((1UL << 4) /* from doff width */ - 6) * 4UL);
 	opts = packet_get(p, idx, sizeof(*th), optlen, NULL);
 
-	conn = tcp_hash_lookup(c, af, saddr, daddr,
-			       ntohs(th->source), ntohs(th->dest));
+	sidx = flow_lookup_af(c, IPPROTO_TCP, PIF_TAP, af, saddr, daddr,
+			      ntohs(th->source), ntohs(th->dest));
+	flow = flow_at_sidx(sidx);
 
 	/* New connection from tap */
-	if (!conn) {
+	if (!flow) {
 		if (opts && th->syn && !th->ack)
 			tcp_conn_from_tap(c, af, saddr, daddr, th,
 					  opts, optlen, now);
 		return 1;
 	}
 
+	ASSERT(flow->f.type == FLOW_TCP);
+	ASSERT(flow->f.pif[sidx.side] == PIF_TAP);
+	conn = &flow->tcp;
+
 	flow_trace(conn, "packet length %zu from tap", len);
 
 	if (th->rst) {
@@ -2676,7 +2560,7 @@ static void tcp_tap_conn_from_sock(struct ctx *c, in_port_t dstport,
 	conn_event(c, conn, SOCK_ACCEPTED);
 
 	tcp_seq_init(c, conn, now);
-	tcp_hash_insert(c, conn);
+	flow_hash_insert(c, TAP_SIDX(conn));
 
 	conn->seq_ack_from_tap = conn->seq_to_tap;
 
@@ -3065,11 +2949,6 @@ static void tcp_sock_refill_init(const struct ctx *c)
  */
 int tcp_init(struct ctx *c)
 {
-	unsigned b;
-
-	for (b = 0; b < TCP_HASH_TABLE_SIZE; b++)
-		tc_hash[b] = FLOW_SIDX_NONE;
-
 	if (c->ifi4)
 		tcp_sock4_iov_init(c);
 
-- 
2.45.0


  parent reply	other threads:[~2024-05-14  1:03 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-05-14  1:03 [PATCH v5 00/19] RFC: Unified flow table David Gibson
2024-05-14  1:03 ` [PATCH v5 01/19] flow: Clarify and enforce flow state transitions David Gibson
2024-05-16  9:30   ` Stefano Brivio
     [not found]     ` <ZkbVxtvmP7f0aL1S@zatzit>
2024-05-17 11:00       ` Stefano Brivio
2024-05-18  6:47         ` David Gibson
2024-05-14  1:03 ` [PATCH v5 02/19] flow: Make side 0 always be the initiating side David Gibson
2024-05-16 12:06   ` Stefano Brivio
2024-05-14  1:03 ` [PATCH v5 03/19] flow: Record the pifs for each side of each flow David Gibson
2024-05-14  1:03 ` [PATCH v5 04/19] tcp: Remove interim 'tapside' field from connection David Gibson
2024-05-14  1:03 ` [PATCH v5 05/19] flow: Common data structures for tracking flow addresses David Gibson
2024-05-14  1:03 ` [PATCH v5 06/19] flow: Populate address information for initiating side David Gibson
     [not found]   ` <20240516202337.1b90e5f2@elisabeth>
     [not found]     ` <ZkbcwkdEwjGv6uwG@zatzit>
     [not found]       ` <20240517215845.4d09eaae@elisabeth>
2024-05-18  7:00         ` David Gibson
2024-05-14  1:03 ` [PATCH v5 07/19] flow: Populate address information for non-initiating side David Gibson
2024-05-14  1:03 ` [PATCH v5 08/19] tcp, flow: Remove redundant information, repack connection structures David Gibson
2024-05-14  1:03 ` [PATCH v5 09/19] tcp: Obtain guest address from flowside David Gibson
2024-05-14  1:03 ` [PATCH v5 10/19] tcp: Simplify endpoint validation using flowside information David Gibson
2024-05-14  1:03 ` [PATCH v5 11/19] tcp_splice: Eliminate SPLICE_V6 flag David Gibson
2024-05-14  1:03 ` [PATCH v5 12/19] tcp, flow: Replace TCP specific hash function with general flow hash David Gibson
2024-05-14  1:03 ` David Gibson [this message]
2024-05-14  1:03 ` [PATCH v5 14/19] tcp: Re-use flow hash for initial sequence number generation David Gibson
2024-05-14  1:03 ` [PATCH v5 15/19] icmp: Use flowsides as the source of truth wherever possible David Gibson
     [not found]   ` <20240516225350.06aebcd7@elisabeth>
     [not found]     ` <ZkcAHhCpx3F0SW2K@zatzit>
     [not found]       ` <20240517221123.1c7197a3@elisabeth>
2024-05-18  7:08         ` David Gibson
2024-05-14  1:03 ` [PATCH v5 16/19] icmp: Look up ping flows using flow hash David Gibson
2024-05-14  1:03 ` [PATCH v5 17/19] icmp: Eliminate icmp_id_map David Gibson
2024-05-14  1:03 ` [PATCH v5 18/19] flow, tcp: Flow based NAT and port forwarding for TCP David Gibson
     [not found]   ` <20240518001345.2d127b09@elisabeth>
2024-05-20  5:44     ` David Gibson
2024-05-14  1:03 ` [PATCH v5 19/19] flow, icmp: Use general flow forwarding rules for ICMP David Gibson
     [not found]   ` <20240518001408.004011b2@elisabeth>
2024-05-20  5:56     ` David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240514010337.1104606-14-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).