public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>, passt-dev@passt.top
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v2 3/4] tcp: Implement hash table with indices rather than pointers
Date: Thu,  7 Dec 2023 16:53:52 +1100	[thread overview]
Message-ID: <20231207055353.1245933-4-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20231207055353.1245933-1-david@gibson.dropbear.id.au>

We implement our hash table with pointers to the entry for each bucket (or
NULL).  However, the entries are always allocated within the flow table,
meaning that a flow index will suffice, halving the size of the hash table.

For TCP, just a flow index would be enough, but future uses will want to
expand the hash table to cover indexing either side of a flow, so use a
flow_sidx_t as the type for each hash bucket.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 flow.h | 11 +++++++++++
 tcp.c  | 33 ++++++++++++++++++++++-----------
 2 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/flow.h b/flow.h
index c2a5190..959b461 100644
--- a/flow.h
+++ b/flow.h
@@ -53,6 +53,17 @@ static_assert(sizeof(flow_sidx_t) <= sizeof(uint32_t),
 
 #define FLOW_SIDX_NONE ((flow_sidx_t){ .flow = FLOW_MAX })
 
+/**
+ * flow_sidx_eq() - Test if two sidx values are equal
+ * @a, @b:	sidx values
+ *
+ * Return: true iff @a and @b refer to the same side of the same flow
+ */
+static inline bool flow_sidx_eq(flow_sidx_t a, flow_sidx_t b)
+{
+	return (a.flow == b.flow) && (a.side == b.side);
+}
+
 union flow;
 
 void flow_table_compact(struct ctx *c, union flow *hole);
diff --git a/tcp.c b/tcp.c
index cd8e64e..8dc9f31 100644
--- a/tcp.c
+++ b/tcp.c
@@ -574,7 +574,7 @@ static unsigned int tcp6_l2_flags_buf_used;
 #define CONN(idx)		(&(FLOW(idx)->tcp))
 
 /* Table for lookup from remote address, local port, remote port */
-static struct tcp_tap_conn *tc_hash[TCP_HASH_TABLE_SIZE];
+static flow_sidx_t tc_hash[TCP_HASH_TABLE_SIZE];
 
 static_assert(ARRAY_SIZE(tc_hash) >= FLOW_MAX,
 	"Safe linear probing requires hash table larger than connection table");
@@ -1197,10 +1197,12 @@ static unsigned int tcp_conn_hash(const struct ctx *c,
 static inline unsigned tcp_hash_probe(const struct ctx *c,
 				      const struct tcp_tap_conn *conn)
 {
+	flow_sidx_t sidx = FLOW_SIDX(conn, TAPSIDE);
 	unsigned b = tcp_conn_hash(c, conn);
 
 	/* Linear probing */
-	while (tc_hash[b] && tc_hash[b] != conn)
+	while (!flow_sidx_eq(tc_hash[b], FLOW_SIDX_NONE) &&
+	       !flow_sidx_eq(tc_hash[b], sidx))
 		b = mod_sub(b, 1, TCP_HASH_TABLE_SIZE);
 
 	return b;
@@ -1215,7 +1217,7 @@ static void tcp_hash_insert(const struct ctx *c, struct tcp_tap_conn *conn)
 {
 	unsigned b = tcp_hash_probe(c, conn);
 
-	tc_hash[b] = conn;
+	tc_hash[b] = FLOW_SIDX(conn, TAPSIDE);
 	flow_dbg(conn, "hash table insert: sock %i, bucket: %u", conn->sock, b);
 }
 
@@ -1228,16 +1230,18 @@ static void tcp_hash_remove(const struct ctx *c,
 			    const struct tcp_tap_conn *conn)
 {
 	unsigned b = tcp_hash_probe(c, conn), s;
+	union flow *flow = flow_at_sidx(tc_hash[b]);
 
-	if (!tc_hash[b])
+	if (!flow)
 		return; /* Redundant remove */
 
 	flow_dbg(conn, "hash table remove: sock %i, bucket: %u", conn->sock, b);
 
 	/* Scan the remainder of the cluster */
-	for (s = mod_sub(b, 1, TCP_HASH_TABLE_SIZE); tc_hash[s];
+	for (s = mod_sub(b, 1, TCP_HASH_TABLE_SIZE);
+	     (flow = flow_at_sidx(tc_hash[s]));
 	     s = mod_sub(s, 1, TCP_HASH_TABLE_SIZE)) {
-		unsigned h = tcp_conn_hash(c, tc_hash[s]);
+		unsigned h = tcp_conn_hash(c, &flow->tcp);
 
 		if (!mod_between(h, s, b, TCP_HASH_TABLE_SIZE)) {
 			/* tc_hash[s] can live in tc_hash[b]'s slot */
@@ -1247,7 +1251,7 @@ static void tcp_hash_remove(const struct ctx *c,
 		}
 	}
 
-	tc_hash[b] = NULL;
+	tc_hash[b] = FLOW_SIDX_NONE;
 }
 
 /**
@@ -1262,10 +1266,10 @@ void tcp_tap_conn_update(const struct ctx *c, struct tcp_tap_conn *old,
 {
 	unsigned b = tcp_hash_probe(c, old);
 
-	if (!tc_hash[b])
+	if (!flow_at_sidx(tc_hash[b]))
 		return; /* Not in hash table, nothing to update */
 
-	tc_hash[b] = new;
+	tc_hash[b] = FLOW_SIDX(new, TAPSIDE);
 
 	debug("TCP: hash table update: old index %u, new index %u, sock %i, "
 	      "bucket: %u", FLOW_IDX(old), FLOW_IDX(new), new->sock, b);
@@ -1288,15 +1292,17 @@ static struct tcp_tap_conn *tcp_hash_lookup(const struct ctx *c,
 					    in_port_t eport, in_port_t fport)
 {
 	union inany_addr aany;
+	union flow *flow;
 	unsigned b;
 
 	inany_from_af(&aany, af, faddr);
 
 	b = tcp_hash(c, &aany, eport, fport);
-	while (tc_hash[b] && !tcp_hash_match(tc_hash[b], &aany, eport, fport))
+	while ((flow = flow_at_sidx(tc_hash[b])) &&
+	       !tcp_hash_match(&flow->tcp, &aany, eport, fport))
 		b = mod_sub(b, 1, TCP_HASH_TABLE_SIZE);
 
-	return tc_hash[b];
+	return &flow->tcp;
 }
 
 /**
@@ -3087,6 +3093,11 @@ static void tcp_sock_refill_init(const struct ctx *c)
  */
 int tcp_init(struct ctx *c)
 {
+	unsigned b;
+
+	for (b = 0; b < TCP_HASH_TABLE_SIZE; b++)
+		tc_hash[b] = FLOW_SIDX_NONE;
+
 	if (c->ifi4)
 		tcp_sock4_iov_init(c);
 
-- 
@@ -574,7 +574,7 @@ static unsigned int tcp6_l2_flags_buf_used;
 #define CONN(idx)		(&(FLOW(idx)->tcp))
 
 /* Table for lookup from remote address, local port, remote port */
-static struct tcp_tap_conn *tc_hash[TCP_HASH_TABLE_SIZE];
+static flow_sidx_t tc_hash[TCP_HASH_TABLE_SIZE];
 
 static_assert(ARRAY_SIZE(tc_hash) >= FLOW_MAX,
 	"Safe linear probing requires hash table larger than connection table");
@@ -1197,10 +1197,12 @@ static unsigned int tcp_conn_hash(const struct ctx *c,
 static inline unsigned tcp_hash_probe(const struct ctx *c,
 				      const struct tcp_tap_conn *conn)
 {
+	flow_sidx_t sidx = FLOW_SIDX(conn, TAPSIDE);
 	unsigned b = tcp_conn_hash(c, conn);
 
 	/* Linear probing */
-	while (tc_hash[b] && tc_hash[b] != conn)
+	while (!flow_sidx_eq(tc_hash[b], FLOW_SIDX_NONE) &&
+	       !flow_sidx_eq(tc_hash[b], sidx))
 		b = mod_sub(b, 1, TCP_HASH_TABLE_SIZE);
 
 	return b;
@@ -1215,7 +1217,7 @@ static void tcp_hash_insert(const struct ctx *c, struct tcp_tap_conn *conn)
 {
 	unsigned b = tcp_hash_probe(c, conn);
 
-	tc_hash[b] = conn;
+	tc_hash[b] = FLOW_SIDX(conn, TAPSIDE);
 	flow_dbg(conn, "hash table insert: sock %i, bucket: %u", conn->sock, b);
 }
 
@@ -1228,16 +1230,18 @@ static void tcp_hash_remove(const struct ctx *c,
 			    const struct tcp_tap_conn *conn)
 {
 	unsigned b = tcp_hash_probe(c, conn), s;
+	union flow *flow = flow_at_sidx(tc_hash[b]);
 
-	if (!tc_hash[b])
+	if (!flow)
 		return; /* Redundant remove */
 
 	flow_dbg(conn, "hash table remove: sock %i, bucket: %u", conn->sock, b);
 
 	/* Scan the remainder of the cluster */
-	for (s = mod_sub(b, 1, TCP_HASH_TABLE_SIZE); tc_hash[s];
+	for (s = mod_sub(b, 1, TCP_HASH_TABLE_SIZE);
+	     (flow = flow_at_sidx(tc_hash[s]));
 	     s = mod_sub(s, 1, TCP_HASH_TABLE_SIZE)) {
-		unsigned h = tcp_conn_hash(c, tc_hash[s]);
+		unsigned h = tcp_conn_hash(c, &flow->tcp);
 
 		if (!mod_between(h, s, b, TCP_HASH_TABLE_SIZE)) {
 			/* tc_hash[s] can live in tc_hash[b]'s slot */
@@ -1247,7 +1251,7 @@ static void tcp_hash_remove(const struct ctx *c,
 		}
 	}
 
-	tc_hash[b] = NULL;
+	tc_hash[b] = FLOW_SIDX_NONE;
 }
 
 /**
@@ -1262,10 +1266,10 @@ void tcp_tap_conn_update(const struct ctx *c, struct tcp_tap_conn *old,
 {
 	unsigned b = tcp_hash_probe(c, old);
 
-	if (!tc_hash[b])
+	if (!flow_at_sidx(tc_hash[b]))
 		return; /* Not in hash table, nothing to update */
 
-	tc_hash[b] = new;
+	tc_hash[b] = FLOW_SIDX(new, TAPSIDE);
 
 	debug("TCP: hash table update: old index %u, new index %u, sock %i, "
 	      "bucket: %u", FLOW_IDX(old), FLOW_IDX(new), new->sock, b);
@@ -1288,15 +1292,17 @@ static struct tcp_tap_conn *tcp_hash_lookup(const struct ctx *c,
 					    in_port_t eport, in_port_t fport)
 {
 	union inany_addr aany;
+	union flow *flow;
 	unsigned b;
 
 	inany_from_af(&aany, af, faddr);
 
 	b = tcp_hash(c, &aany, eport, fport);
-	while (tc_hash[b] && !tcp_hash_match(tc_hash[b], &aany, eport, fport))
+	while ((flow = flow_at_sidx(tc_hash[b])) &&
+	       !tcp_hash_match(&flow->tcp, &aany, eport, fport))
 		b = mod_sub(b, 1, TCP_HASH_TABLE_SIZE);
 
-	return tc_hash[b];
+	return &flow->tcp;
 }
 
 /**
@@ -3087,6 +3093,11 @@ static void tcp_sock_refill_init(const struct ctx *c)
  */
 int tcp_init(struct ctx *c)
 {
+	unsigned b;
+
+	for (b = 0; b < TCP_HASH_TABLE_SIZE; b++)
+		tc_hash[b] = FLOW_SIDX_NONE;
+
 	if (c->ifi4)
 		tcp_sock4_iov_init(c);
 
-- 
2.43.0


  parent reply	other threads:[~2023-12-07  5:53 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-07  5:53 [PATCH v2 0/4] TCP hash table changes, in preparation for flow table David Gibson
2023-12-07  5:53 ` [PATCH v2 1/4] tcp: Fix conceptually incorrect byte-order switch in tcp_tap_handler() David Gibson
2023-12-07  5:53 ` [PATCH v2 2/4] tcp: Switch hash table to linear probing instead of chaining David Gibson
2023-12-07  5:53 ` David Gibson [this message]
2023-12-07  5:53 ` [PATCH v2 4/4] tcp: Don't account for hash table size in tcp_hash() David Gibson
2023-12-27 20:23 ` [PATCH v2 0/4] TCP hash table changes, in preparation for flow table Stefano Brivio

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231207055353.1245933-4-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).