From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>, passt-dev@passt.top
Cc: jmaloy@redhat.com, David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v7 08/27] tcp, flow: Replace TCP specific hash function with general flow hash
Date: Fri, 5 Jul 2024 12:07:05 +1000 [thread overview]
Message-ID: <20240705020724.3447719-9-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20240705020724.3447719-1-david@gibson.dropbear.id.au>
Currently we match TCP packets received on the tap connection to a TCP
connection via a hash table based on the forwarding address and both
ports. We hope in future to allow for multiple guest side addresses, or
for multiple interfaces which means we may need to distinguish based on
the endpoint address and pif as well. We also want a unified hash table
to cover multiple protocols, not just TCP.
Replace the TCP specific hash function with one suitable for general flows,
or rather for one side of a general flow. This includes all the
information from struct flowside, plus the pif and the L4 protocol number.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
flow.c | 35 +++++++++++++++++++++++++++---
flow.h | 19 ++++++++++++++++
flow_table.h | 3 +++
tcp.c | 61 ++++++++++------------------------------------------
4 files changed, 65 insertions(+), 53 deletions(-)
diff --git a/flow.c b/flow.c
index f064fad1..30d10e9d 100644
--- a/flow.c
+++ b/flow.c
@@ -116,9 +116,9 @@ static struct timespec flow_timer_run;
* @faddr: Forwarding address (pointer to in_addr or in6_addr)
* @fport: Forwarding port
*/
-static void flowside_from_af(struct flowside *fside, sa_family_t af,
- const void *eaddr, in_port_t eport,
- const void *faddr, in_port_t fport)
+void flowside_from_af(struct flowside *fside, sa_family_t af,
+ const void *eaddr, in_port_t eport,
+ const void *faddr, in_port_t fport)
{
if (faddr)
inany_from_af(&fside->faddr, af, faddr);
@@ -401,6 +401,35 @@ void flow_alloc_cancel(union flow *flow)
flow_new_entry = NULL;
}
+/**
+ * flow_hash() - Calculate hash value for one side of a flow
+ * @c: Execution context
+ * @proto: Protocol of this flow (IP L4 protocol number)
+ * @pif: pif of the side to hash
+ * @fside: Flowside (must not have unspecified parts)
+ *
+ * Return: hash value
+ */
+uint64_t flow_hash(const struct ctx *c, uint8_t proto, uint8_t pif,
+ const struct flowside *fside)
+{
+ struct siphash_state state = SIPHASH_INIT(c->hash_secret);
+
+ /* For the hash table to work, we need complete endpoint information,
+ * and at least a forwarding port.
+ */
+ ASSERT(pif != PIF_NONE && !inany_is_unspecified(&fside->eaddr) &&
+ fside->eport != 0 && fside->fport != 0);
+
+ inany_siphash_feed(&state, &fside->faddr);
+ inany_siphash_feed(&state, &fside->eaddr);
+
+ return siphash_final(&state, 38, (uint64_t)proto << 40 |
+ (uint64_t)pif << 32 |
+ (uint64_t)fside->fport << 16 |
+ (uint64_t)fside->eport);
+}
+
/**
* flow_defer_handler() - Handler for per-flow deferred and timed tasks
* @c: Execution context
diff --git a/flow.h b/flow.h
index 4c3762b9..0b1e5de2 100644
--- a/flow.h
+++ b/flow.h
@@ -149,6 +149,25 @@ struct flowside {
in_port_t eport;
};
+/**
+ * flowside_eq() - Check if two flowsides are equal
+ * @left, @right: Flowsides to compare
+ *
+ * Return: true if equal, false otherwise
+ */
+static inline bool flowside_eq(const struct flowside *left,
+ const struct flowside *right)
+{
+ return inany_equals(&left->eaddr, &right->eaddr) &&
+ left->eport == right->eport &&
+ inany_equals(&left->faddr, &right->faddr) &&
+ left->fport == right->fport;
+}
+
+void flowside_from_af(struct flowside *fside, sa_family_t af,
+ const void *eaddr, in_port_t eport,
+ const void *faddr, in_port_t fport);
+
/**
* struct flow_common - Common fields for packet flows
* @state: State of the flow table entry
diff --git a/flow_table.h b/flow_table.h
index 00dca4b2..9bfa1174 100644
--- a/flow_table.h
+++ b/flow_table.h
@@ -126,4 +126,7 @@ void flow_activate(struct flow_common *f);
#define FLOW_ACTIVATE(flow_) \
(flow_activate(&(flow_)->f))
+uint64_t flow_hash(const struct ctx *c, uint8_t proto, uint8_t pif,
+ const struct flowside *fside);
+
#endif /* FLOW_TABLE_H */
diff --git a/tcp.c b/tcp.c
index 45ea9a71..b1ad1014 100644
--- a/tcp.c
+++ b/tcp.c
@@ -376,7 +376,7 @@ static struct iovec tcp_iov [UIO_MAXIOV];
#define CONN(idx) (&(FLOW(idx)->tcp))
-/* Table for lookup from remote address, local port, remote port */
+/* Table for lookup from flowside information */
static flow_sidx_t tc_hash[TCP_HASH_TABLE_SIZE];
static_assert(ARRAY_SIZE(tc_hash) >= FLOW_MAX,
@@ -814,46 +814,6 @@ static int tcp_opt_get(const char *opts, size_t len, uint8_t type_find,
return -1;
}
-/**
- * tcp_hash_match() - Check if a connection entry matches address and ports
- * @conn: Connection entry to match against
- * @faddr: Guest side forwarding address
- * @eport: Guest side endpoint port
- * @fport: Guest side forwarding port
- *
- * Return: 1 on match, 0 otherwise
- */
-static int tcp_hash_match(const struct tcp_tap_conn *conn,
- const union inany_addr *faddr,
- in_port_t eport, in_port_t fport)
-{
- const struct flowside *tapside = TAPFLOW(conn);
-
- if (inany_equals(&tapside->faddr, faddr) &&
- tapside->eport == eport && tapside->fport == fport)
- return 1;
-
- return 0;
-}
-
-/**
- * tcp_hash() - Calculate hash value for connection given address and ports
- * @c: Execution context
- * @faddr: Guest side forwarding address
- * @eport: Guest side endpoint port
- * @fport: Guest side forwarding port
- *
- * Return: hash value, needs to be adjusted for table size
- */
-static uint64_t tcp_hash(const struct ctx *c, const union inany_addr *faddr,
- in_port_t eport, in_port_t fport)
-{
- struct siphash_state state = SIPHASH_INIT(c->hash_secret);
-
- inany_siphash_feed(&state, faddr);
- return siphash_final(&state, 20, (uint64_t)eport << 16 | fport);
-}
-
/**
* tcp_conn_hash() - Calculate hash bucket of an existing connection
* @c: Execution context
@@ -866,8 +826,7 @@ static uint64_t tcp_conn_hash(const struct ctx *c,
{
const struct flowside *tapside = TAPFLOW(conn);
- return tcp_hash(c, &tapside->faddr, tapside->eport,
- tapside->fport);
+ return flow_hash(c, IPPROTO_TCP, conn->f.pif[TAPSIDE(conn)], tapside);
}
/**
@@ -941,25 +900,26 @@ static void tcp_hash_remove(const struct ctx *c,
* tcp_hash_lookup() - Look up connection given remote address and ports
* @c: Execution context
* @af: Address family, AF_INET or AF_INET6
+ * @eaddr: Guest side endpoint address (guest local address)
* @faddr: Guest side forwarding address (guest remote address)
* @eport: Guest side endpoint port (guest local port)
* @fport: Guest side forwarding port (guest remote port)
*
* Return: connection pointer, if found, -ENOENT otherwise
*/
-static struct tcp_tap_conn *tcp_hash_lookup(const struct ctx *c,
- sa_family_t af, const void *faddr,
+static struct tcp_tap_conn *tcp_hash_lookup(const struct ctx *c, sa_family_t af,
+ const void *eaddr, const void *faddr,
in_port_t eport, in_port_t fport)
{
- union inany_addr aany;
+ struct flowside fside;
union flow *flow;
unsigned b;
- inany_from_af(&aany, af, faddr);
+ flowside_from_af(&fside, af, eaddr, eport, faddr, fport);
- b = tcp_hash(c, &aany, eport, fport) % TCP_HASH_TABLE_SIZE;
+ b = flow_hash(c, IPPROTO_TCP, PIF_TAP, &fside) % TCP_HASH_TABLE_SIZE;
while ((flow = flow_at_sidx(tc_hash[b])) &&
- !tcp_hash_match(&flow->tcp, &aany, eport, fport))
+ !flowside_eq(&flow->f.side[TAPSIDE(flow)], &fside))
b = mod_sub(b, 1, TCP_HASH_TABLE_SIZE);
return &flow->tcp;
@@ -2047,7 +2007,8 @@ int tcp_tap_handler(struct ctx *c, uint8_t pif, sa_family_t af,
optlen = MIN(optlen, ((1UL << 4) /* from doff width */ - 6) * 4UL);
opts = packet_get(p, idx, sizeof(*th), optlen, NULL);
- conn = tcp_hash_lookup(c, af, daddr, ntohs(th->source), ntohs(th->dest));
+ conn = tcp_hash_lookup(c, af, saddr, daddr,
+ ntohs(th->source), ntohs(th->dest));
/* New connection from tap */
if (!conn) {
--
@@ -376,7 +376,7 @@ static struct iovec tcp_iov [UIO_MAXIOV];
#define CONN(idx) (&(FLOW(idx)->tcp))
-/* Table for lookup from remote address, local port, remote port */
+/* Table for lookup from flowside information */
static flow_sidx_t tc_hash[TCP_HASH_TABLE_SIZE];
static_assert(ARRAY_SIZE(tc_hash) >= FLOW_MAX,
@@ -814,46 +814,6 @@ static int tcp_opt_get(const char *opts, size_t len, uint8_t type_find,
return -1;
}
-/**
- * tcp_hash_match() - Check if a connection entry matches address and ports
- * @conn: Connection entry to match against
- * @faddr: Guest side forwarding address
- * @eport: Guest side endpoint port
- * @fport: Guest side forwarding port
- *
- * Return: 1 on match, 0 otherwise
- */
-static int tcp_hash_match(const struct tcp_tap_conn *conn,
- const union inany_addr *faddr,
- in_port_t eport, in_port_t fport)
-{
- const struct flowside *tapside = TAPFLOW(conn);
-
- if (inany_equals(&tapside->faddr, faddr) &&
- tapside->eport == eport && tapside->fport == fport)
- return 1;
-
- return 0;
-}
-
-/**
- * tcp_hash() - Calculate hash value for connection given address and ports
- * @c: Execution context
- * @faddr: Guest side forwarding address
- * @eport: Guest side endpoint port
- * @fport: Guest side forwarding port
- *
- * Return: hash value, needs to be adjusted for table size
- */
-static uint64_t tcp_hash(const struct ctx *c, const union inany_addr *faddr,
- in_port_t eport, in_port_t fport)
-{
- struct siphash_state state = SIPHASH_INIT(c->hash_secret);
-
- inany_siphash_feed(&state, faddr);
- return siphash_final(&state, 20, (uint64_t)eport << 16 | fport);
-}
-
/**
* tcp_conn_hash() - Calculate hash bucket of an existing connection
* @c: Execution context
@@ -866,8 +826,7 @@ static uint64_t tcp_conn_hash(const struct ctx *c,
{
const struct flowside *tapside = TAPFLOW(conn);
- return tcp_hash(c, &tapside->faddr, tapside->eport,
- tapside->fport);
+ return flow_hash(c, IPPROTO_TCP, conn->f.pif[TAPSIDE(conn)], tapside);
}
/**
@@ -941,25 +900,26 @@ static void tcp_hash_remove(const struct ctx *c,
* tcp_hash_lookup() - Look up connection given remote address and ports
* @c: Execution context
* @af: Address family, AF_INET or AF_INET6
+ * @eaddr: Guest side endpoint address (guest local address)
* @faddr: Guest side forwarding address (guest remote address)
* @eport: Guest side endpoint port (guest local port)
* @fport: Guest side forwarding port (guest remote port)
*
* Return: connection pointer, if found, -ENOENT otherwise
*/
-static struct tcp_tap_conn *tcp_hash_lookup(const struct ctx *c,
- sa_family_t af, const void *faddr,
+static struct tcp_tap_conn *tcp_hash_lookup(const struct ctx *c, sa_family_t af,
+ const void *eaddr, const void *faddr,
in_port_t eport, in_port_t fport)
{
- union inany_addr aany;
+ struct flowside fside;
union flow *flow;
unsigned b;
- inany_from_af(&aany, af, faddr);
+ flowside_from_af(&fside, af, eaddr, eport, faddr, fport);
- b = tcp_hash(c, &aany, eport, fport) % TCP_HASH_TABLE_SIZE;
+ b = flow_hash(c, IPPROTO_TCP, PIF_TAP, &fside) % TCP_HASH_TABLE_SIZE;
while ((flow = flow_at_sidx(tc_hash[b])) &&
- !tcp_hash_match(&flow->tcp, &aany, eport, fport))
+ !flowside_eq(&flow->f.side[TAPSIDE(flow)], &fside))
b = mod_sub(b, 1, TCP_HASH_TABLE_SIZE);
return &flow->tcp;
@@ -2047,7 +2007,8 @@ int tcp_tap_handler(struct ctx *c, uint8_t pif, sa_family_t af,
optlen = MIN(optlen, ((1UL << 4) /* from doff width */ - 6) * 4UL);
opts = packet_get(p, idx, sizeof(*th), optlen, NULL);
- conn = tcp_hash_lookup(c, af, daddr, ntohs(th->source), ntohs(th->dest));
+ conn = tcp_hash_lookup(c, af, saddr, daddr,
+ ntohs(th->source), ntohs(th->dest));
/* New connection from tap */
if (!conn) {
--
2.45.2
next prev parent reply other threads:[~2024-07-05 2:07 UTC|newest]
Thread overview: 59+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-07-05 2:06 [PATCH v7 00/27] Unified flow table David Gibson
2024-07-05 2:06 ` [PATCH v7 01/27] flow: Common address information for initiating side David Gibson
2024-07-05 2:06 ` [PATCH v7 02/27] flow: Common address information for target side David Gibson
2024-07-10 21:30 ` Stefano Brivio
2024-07-11 0:19 ` David Gibson
2024-07-05 2:07 ` [PATCH v7 03/27] tcp, flow: Remove redundant information, repack connection structures David Gibson
2024-07-05 2:07 ` [PATCH v7 04/27] tcp: Obtain guest address from flowside David Gibson
2024-07-05 2:07 ` [PATCH v7 05/27] tcp: Manage outbound address via flow table David Gibson
2024-07-05 2:07 ` [PATCH v7 06/27] tcp: Simplify endpoint validation using flowside information David Gibson
2024-07-05 2:07 ` [PATCH v7 07/27] tcp_splice: Eliminate SPLICE_V6 flag David Gibson
2024-07-05 2:07 ` David Gibson [this message]
2024-07-05 2:07 ` [PATCH v7 09/27] flow, tcp: Generalise TCP hash table to general flow hash table David Gibson
2024-07-05 2:07 ` [PATCH v7 10/27] tcp: Re-use flow hash for initial sequence number generation David Gibson
2024-07-05 2:07 ` [PATCH v7 11/27] icmp: Remove redundant id field from flow table entry David Gibson
2024-07-05 2:07 ` [PATCH v7 12/27] icmp: Obtain destination addresses from the flowsides David Gibson
2024-07-05 2:07 ` [PATCH v7 13/27] icmp: Look up ping flows using flow hash David Gibson
2024-07-05 2:07 ` [PATCH v7 14/27] icmp: Eliminate icmp_id_map David Gibson
2024-07-05 2:07 ` [PATCH v7 15/27] flow: Helper to create sockets based on flowside David Gibson
2024-07-10 21:32 ` Stefano Brivio
2024-07-11 0:21 ` David Gibson
2024-07-11 0:27 ` David Gibson
2024-07-05 2:07 ` [PATCH v7 16/27] icmp: Manage outbound socket address via flow table David Gibson
2024-07-05 2:07 ` [PATCH v7 17/27] flow, tcp: Flow based NAT and port forwarding for TCP David Gibson
2024-07-05 2:07 ` [PATCH v7 18/27] flow, icmp: Use general flow forwarding rules for ICMP David Gibson
2024-07-05 2:07 ` [PATCH v7 19/27] fwd: Update flow forwarding logic for UDP David Gibson
2024-07-08 21:26 ` Stefano Brivio
2024-07-09 0:19 ` David Gibson
2024-07-05 2:07 ` [PATCH v7 20/27] udp: Create flows for datagrams from originating sockets David Gibson
2024-07-09 22:32 ` Stefano Brivio
2024-07-09 23:59 ` David Gibson
2024-07-10 21:35 ` Stefano Brivio
2024-07-11 4:26 ` David Gibson
2024-07-11 8:20 ` Stefano Brivio
2024-07-11 22:58 ` David Gibson
2024-07-12 8:21 ` Stefano Brivio
2024-07-15 4:06 ` David Gibson
2024-07-15 16:37 ` Stefano Brivio
2024-07-17 0:49 ` David Gibson
2024-07-05 2:07 ` [PATCH v7 21/27] udp: Handle "spliced" datagrams with per-flow sockets David Gibson
2024-07-09 22:32 ` Stefano Brivio
2024-07-10 0:23 ` David Gibson
2024-07-10 17:13 ` Stefano Brivio
2024-07-11 1:30 ` David Gibson
2024-07-11 8:23 ` Stefano Brivio
2024-07-11 2:48 ` David Gibson
2024-07-12 13:34 ` Stefano Brivio
2024-07-15 4:32 ` David Gibson
2024-07-05 2:07 ` [PATCH v7 22/27] udp: Remove obsolete splice tracking David Gibson
2024-07-10 21:36 ` Stefano Brivio
2024-07-11 0:43 ` David Gibson
2024-07-05 2:07 ` [PATCH v7 23/27] udp: Find or create flows for datagrams from tap interface David Gibson
2024-07-10 21:36 ` Stefano Brivio
2024-07-11 0:45 ` David Gibson
2024-07-05 2:07 ` [PATCH v7 24/27] udp: Direct datagrams from host to guest via flow table David Gibson
2024-07-10 21:37 ` Stefano Brivio
2024-07-11 0:46 ` David Gibson
2024-07-05 2:07 ` [PATCH v7 25/27] udp: Remove obsolete socket tracking David Gibson
2024-07-05 2:07 ` [PATCH v7 26/27] udp: Remove rdelta port forwarding maps David Gibson
2024-07-05 2:07 ` [PATCH v7 27/27] udp: Rename UDP listening sockets David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240705020724.3447719-9-david@gibson.dropbear.id.au \
--to=david@gibson.dropbear.id.au \
--cc=jmaloy@redhat.com \
--cc=passt-dev@passt.top \
--cc=sbrivio@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).