From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>, passt-dev@passt.top
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v2 3/4] tcp: Implement hash table with indices rather than pointers
Date: Thu, 7 Dec 2023 16:53:52 +1100 [thread overview]
Message-ID: <20231207055353.1245933-4-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20231207055353.1245933-1-david@gibson.dropbear.id.au>
We implement our hash table with pointers to the entry for each bucket (or
NULL). However, the entries are always allocated within the flow table,
meaning that a flow index will suffice, halving the size of the hash table.
For TCP, just a flow index would be enough, but future uses will want to
expand the hash table to cover indexing either side of a flow, so use a
flow_sidx_t as the type for each hash bucket.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
flow.h | 11 +++++++++++
tcp.c | 33 ++++++++++++++++++++++-----------
2 files changed, 33 insertions(+), 11 deletions(-)
diff --git a/flow.h b/flow.h
index c2a5190..959b461 100644
--- a/flow.h
+++ b/flow.h
@@ -53,6 +53,17 @@ static_assert(sizeof(flow_sidx_t) <= sizeof(uint32_t),
#define FLOW_SIDX_NONE ((flow_sidx_t){ .flow = FLOW_MAX })
+/**
+ * flow_sidx_eq() - Test if two sidx values are equal
+ * @a, @b: sidx values
+ *
+ * Return: true iff @a and @b refer to the same side of the same flow
+ */
+static inline bool flow_sidx_eq(flow_sidx_t a, flow_sidx_t b)
+{
+ return (a.flow == b.flow) && (a.side == b.side);
+}
+
union flow;
void flow_table_compact(struct ctx *c, union flow *hole);
diff --git a/tcp.c b/tcp.c
index cd8e64e..8dc9f31 100644
--- a/tcp.c
+++ b/tcp.c
@@ -574,7 +574,7 @@ static unsigned int tcp6_l2_flags_buf_used;
#define CONN(idx) (&(FLOW(idx)->tcp))
/* Table for lookup from remote address, local port, remote port */
-static struct tcp_tap_conn *tc_hash[TCP_HASH_TABLE_SIZE];
+static flow_sidx_t tc_hash[TCP_HASH_TABLE_SIZE];
static_assert(ARRAY_SIZE(tc_hash) >= FLOW_MAX,
"Safe linear probing requires hash table larger than connection table");
@@ -1197,10 +1197,12 @@ static unsigned int tcp_conn_hash(const struct ctx *c,
static inline unsigned tcp_hash_probe(const struct ctx *c,
const struct tcp_tap_conn *conn)
{
+ flow_sidx_t sidx = FLOW_SIDX(conn, TAPSIDE);
unsigned b = tcp_conn_hash(c, conn);
/* Linear probing */
- while (tc_hash[b] && tc_hash[b] != conn)
+ while (!flow_sidx_eq(tc_hash[b], FLOW_SIDX_NONE) &&
+ !flow_sidx_eq(tc_hash[b], sidx))
b = mod_sub(b, 1, TCP_HASH_TABLE_SIZE);
return b;
@@ -1215,7 +1217,7 @@ static void tcp_hash_insert(const struct ctx *c, struct tcp_tap_conn *conn)
{
unsigned b = tcp_hash_probe(c, conn);
- tc_hash[b] = conn;
+ tc_hash[b] = FLOW_SIDX(conn, TAPSIDE);
flow_dbg(conn, "hash table insert: sock %i, bucket: %u", conn->sock, b);
}
@@ -1228,16 +1230,18 @@ static void tcp_hash_remove(const struct ctx *c,
const struct tcp_tap_conn *conn)
{
unsigned b = tcp_hash_probe(c, conn), s;
+ union flow *flow = flow_at_sidx(tc_hash[b]);
- if (!tc_hash[b])
+ if (!flow)
return; /* Redundant remove */
flow_dbg(conn, "hash table remove: sock %i, bucket: %u", conn->sock, b);
/* Scan the remainder of the cluster */
- for (s = mod_sub(b, 1, TCP_HASH_TABLE_SIZE); tc_hash[s];
+ for (s = mod_sub(b, 1, TCP_HASH_TABLE_SIZE);
+ (flow = flow_at_sidx(tc_hash[s]));
s = mod_sub(s, 1, TCP_HASH_TABLE_SIZE)) {
- unsigned h = tcp_conn_hash(c, tc_hash[s]);
+ unsigned h = tcp_conn_hash(c, &flow->tcp);
if (!mod_between(h, s, b, TCP_HASH_TABLE_SIZE)) {
/* tc_hash[s] can live in tc_hash[b]'s slot */
@@ -1247,7 +1251,7 @@ static void tcp_hash_remove(const struct ctx *c,
}
}
- tc_hash[b] = NULL;
+ tc_hash[b] = FLOW_SIDX_NONE;
}
/**
@@ -1262,10 +1266,10 @@ void tcp_tap_conn_update(const struct ctx *c, struct tcp_tap_conn *old,
{
unsigned b = tcp_hash_probe(c, old);
- if (!tc_hash[b])
+ if (!flow_at_sidx(tc_hash[b]))
return; /* Not in hash table, nothing to update */
- tc_hash[b] = new;
+ tc_hash[b] = FLOW_SIDX(new, TAPSIDE);
debug("TCP: hash table update: old index %u, new index %u, sock %i, "
"bucket: %u", FLOW_IDX(old), FLOW_IDX(new), new->sock, b);
@@ -1288,15 +1292,17 @@ static struct tcp_tap_conn *tcp_hash_lookup(const struct ctx *c,
in_port_t eport, in_port_t fport)
{
union inany_addr aany;
+ union flow *flow;
unsigned b;
inany_from_af(&aany, af, faddr);
b = tcp_hash(c, &aany, eport, fport);
- while (tc_hash[b] && !tcp_hash_match(tc_hash[b], &aany, eport, fport))
+ while ((flow = flow_at_sidx(tc_hash[b])) &&
+ !tcp_hash_match(&flow->tcp, &aany, eport, fport))
b = mod_sub(b, 1, TCP_HASH_TABLE_SIZE);
- return tc_hash[b];
+ return &flow->tcp;
}
/**
@@ -3087,6 +3093,11 @@ static void tcp_sock_refill_init(const struct ctx *c)
*/
int tcp_init(struct ctx *c)
{
+ unsigned b;
+
+ for (b = 0; b < TCP_HASH_TABLE_SIZE; b++)
+ tc_hash[b] = FLOW_SIDX_NONE;
+
if (c->ifi4)
tcp_sock4_iov_init(c);
--
@@ -574,7 +574,7 @@ static unsigned int tcp6_l2_flags_buf_used;
#define CONN(idx) (&(FLOW(idx)->tcp))
/* Table for lookup from remote address, local port, remote port */
-static struct tcp_tap_conn *tc_hash[TCP_HASH_TABLE_SIZE];
+static flow_sidx_t tc_hash[TCP_HASH_TABLE_SIZE];
static_assert(ARRAY_SIZE(tc_hash) >= FLOW_MAX,
"Safe linear probing requires hash table larger than connection table");
@@ -1197,10 +1197,12 @@ static unsigned int tcp_conn_hash(const struct ctx *c,
static inline unsigned tcp_hash_probe(const struct ctx *c,
const struct tcp_tap_conn *conn)
{
+ flow_sidx_t sidx = FLOW_SIDX(conn, TAPSIDE);
unsigned b = tcp_conn_hash(c, conn);
/* Linear probing */
- while (tc_hash[b] && tc_hash[b] != conn)
+ while (!flow_sidx_eq(tc_hash[b], FLOW_SIDX_NONE) &&
+ !flow_sidx_eq(tc_hash[b], sidx))
b = mod_sub(b, 1, TCP_HASH_TABLE_SIZE);
return b;
@@ -1215,7 +1217,7 @@ static void tcp_hash_insert(const struct ctx *c, struct tcp_tap_conn *conn)
{
unsigned b = tcp_hash_probe(c, conn);
- tc_hash[b] = conn;
+ tc_hash[b] = FLOW_SIDX(conn, TAPSIDE);
flow_dbg(conn, "hash table insert: sock %i, bucket: %u", conn->sock, b);
}
@@ -1228,16 +1230,18 @@ static void tcp_hash_remove(const struct ctx *c,
const struct tcp_tap_conn *conn)
{
unsigned b = tcp_hash_probe(c, conn), s;
+ union flow *flow = flow_at_sidx(tc_hash[b]);
- if (!tc_hash[b])
+ if (!flow)
return; /* Redundant remove */
flow_dbg(conn, "hash table remove: sock %i, bucket: %u", conn->sock, b);
/* Scan the remainder of the cluster */
- for (s = mod_sub(b, 1, TCP_HASH_TABLE_SIZE); tc_hash[s];
+ for (s = mod_sub(b, 1, TCP_HASH_TABLE_SIZE);
+ (flow = flow_at_sidx(tc_hash[s]));
s = mod_sub(s, 1, TCP_HASH_TABLE_SIZE)) {
- unsigned h = tcp_conn_hash(c, tc_hash[s]);
+ unsigned h = tcp_conn_hash(c, &flow->tcp);
if (!mod_between(h, s, b, TCP_HASH_TABLE_SIZE)) {
/* tc_hash[s] can live in tc_hash[b]'s slot */
@@ -1247,7 +1251,7 @@ static void tcp_hash_remove(const struct ctx *c,
}
}
- tc_hash[b] = NULL;
+ tc_hash[b] = FLOW_SIDX_NONE;
}
/**
@@ -1262,10 +1266,10 @@ void tcp_tap_conn_update(const struct ctx *c, struct tcp_tap_conn *old,
{
unsigned b = tcp_hash_probe(c, old);
- if (!tc_hash[b])
+ if (!flow_at_sidx(tc_hash[b]))
return; /* Not in hash table, nothing to update */
- tc_hash[b] = new;
+ tc_hash[b] = FLOW_SIDX(new, TAPSIDE);
debug("TCP: hash table update: old index %u, new index %u, sock %i, "
"bucket: %u", FLOW_IDX(old), FLOW_IDX(new), new->sock, b);
@@ -1288,15 +1292,17 @@ static struct tcp_tap_conn *tcp_hash_lookup(const struct ctx *c,
in_port_t eport, in_port_t fport)
{
union inany_addr aany;
+ union flow *flow;
unsigned b;
inany_from_af(&aany, af, faddr);
b = tcp_hash(c, &aany, eport, fport);
- while (tc_hash[b] && !tcp_hash_match(tc_hash[b], &aany, eport, fport))
+ while ((flow = flow_at_sidx(tc_hash[b])) &&
+ !tcp_hash_match(&flow->tcp, &aany, eport, fport))
b = mod_sub(b, 1, TCP_HASH_TABLE_SIZE);
- return tc_hash[b];
+ return &flow->tcp;
}
/**
@@ -3087,6 +3093,11 @@ static void tcp_sock_refill_init(const struct ctx *c)
*/
int tcp_init(struct ctx *c)
{
+ unsigned b;
+
+ for (b = 0; b < TCP_HASH_TABLE_SIZE; b++)
+ tc_hash[b] = FLOW_SIDX_NONE;
+
if (c->ifi4)
tcp_sock4_iov_init(c);
--
2.43.0
next prev parent reply other threads:[~2023-12-07 5:53 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-12-07 5:53 [PATCH v2 0/4] TCP hash table changes, in preparation for flow table David Gibson
2023-12-07 5:53 ` [PATCH v2 1/4] tcp: Fix conceptually incorrect byte-order switch in tcp_tap_handler() David Gibson
2023-12-07 5:53 ` [PATCH v2 2/4] tcp: Switch hash table to linear probing instead of chaining David Gibson
2023-12-07 5:53 ` David Gibson [this message]
2023-12-07 5:53 ` [PATCH v2 4/4] tcp: Don't account for hash table size in tcp_hash() David Gibson
2023-12-27 20:23 ` [PATCH v2 0/4] TCP hash table changes, in preparation for flow table Stefano Brivio
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231207055353.1245933-4-david@gibson.dropbear.id.au \
--to=david@gibson.dropbear.id.au \
--cc=passt-dev@passt.top \
--cc=sbrivio@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).