From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>, passt-dev@passt.top
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v5 03/19] flow: Record the pifs for each side of each flow
Date: Tue, 14 May 2024 11:03:21 +1000 [thread overview]
Message-ID: <20240514010337.1104606-4-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20240514010337.1104606-1-david@gibson.dropbear.id.au>
Currently we have no generic information flows apart from the type and
state, everything else is specific to the flow type. Start introducing
generic flow information by recording the pifs which the flow connects.
To keep track of what information is valid, introduce new flow states:
INI for when the initiating side information is complete, and FWD for when
both sides information is complete. For now, these states seem like busy
work, but they'll become more important as we add more generic information.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
flow.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++-----
flow.h | 49 ++++++++++++++++++++++++++++++++++++++-------
flow_table.h | 3 +++
icmp.c | 2 ++
pif.h | 1 -
tcp.c | 10 +++++++++-
tcp_splice.c | 1 +
7 files changed, 108 insertions(+), 14 deletions(-)
diff --git a/flow.c b/flow.c
index 7456021..aee2736 100644
--- a/flow.c
+++ b/flow.c
@@ -21,6 +21,8 @@
const char *flow_state_str[] = {
[FLOW_STATE_FREE] = "FREE",
[FLOW_STATE_NEW] = "NEW",
+ [FLOW_STATE_INI] = "INI",
+ [FLOW_STATE_FWD] = "FWD",
[FLOW_STATE_TYPED] = "TYPED",
[FLOW_STATE_ACTIVE] = "ACTIVE",
};
@@ -146,22 +148,63 @@ static void flow_set_state(struct flow_common *f, enum flow_state state)
f->state = state;
flow_log_(f, LOG_DEBUG, "%s -> %s", flow_state_str[oldstate],
FLOW_STATE(f));
+
+ if (MAX(state, oldstate) >= FLOW_STATE_FWD)
+ flow_log_(f, LOG_DEBUG, "%s => %s", pif_name(f->pif[INISIDE]),
+ pif_name(f->pif[FWDSIDE]));
+ else if (MAX(state, oldstate) >= FLOW_STATE_INI)
+ flow_log_(f, LOG_DEBUG, "%s => ?", pif_name(f->pif[INISIDE]));
}
/**
- * flow_set_type() - Set type and mvoe to TYPED state
+ * flow_initiate() - Move flow to INI state, setting INISIDE details
* @flow: Flow to change state
- * @type: Type for new flow
- *
- * Return: @flow
+ * @pif: pif of the initiating side
+ */
+void flow_initiate(union flow *flow, uint8_t pif)
+{
+ struct flow_common *f = &flow->f;
+
+ ASSERT(pif != PIF_NONE);
+ ASSERT(flow_new_entry == flow && f->state == FLOW_STATE_NEW);
+ ASSERT(f->type == FLOW_TYPE_NONE);
+ ASSERT(f->pif[INISIDE] == PIF_NONE && f->pif[FWDSIDE] == PIF_NONE);
+
+ f->pif[INISIDE] = pif;
+ flow_set_state(f, FLOW_STATE_INI);
+}
+
+/**
+ * flow_forward() - Move flow to FWD state, setting FWDSIDE details
+ * @flow: Flow to change state
+ * @pif: pif of the forwarded side
+ */
+void flow_forward(union flow *flow, uint8_t pif)
+{
+ struct flow_common *f = &flow->f;
+
+ ASSERT(pif != PIF_NONE);
+ ASSERT(flow_new_entry == flow && f->state == FLOW_STATE_INI);
+ ASSERT(f->type == FLOW_TYPE_NONE);
+ ASSERT(f->pif[INISIDE] != PIF_NONE && f->pif[FWDSIDE] == PIF_NONE);
+
+ f->pif[FWDSIDE] = pif;
+ flow_set_state(f, FLOW_STATE_FWD);
+}
+
+/**
+ * flow_set_type() - Set type and move to TYPED state
+ * @flow: Flow to change state
+ * @pif: pif of the initiating side
*/
union flow *flow_set_type(union flow *flow, enum flow_type type)
{
struct flow_common *f = &flow->f;
ASSERT(type != FLOW_TYPE_NONE);
- ASSERT(flow_new_entry == flow && f->state == FLOW_STATE_NEW);
+ ASSERT(flow_new_entry == flow && f->state == FLOW_STATE_FWD);
ASSERT(f->type == FLOW_TYPE_NONE);
+ ASSERT(f->pif[INISIDE] != PIF_NONE && f->pif[FWDSIDE] != PIF_NONE);
f->type = type;
flow_set_state(f, FLOW_STATE_TYPED);
@@ -175,6 +218,7 @@ union flow *flow_set_type(union flow *flow, enum flow_type type)
void flow_activate(struct flow_common *f)
{
ASSERT(&flow_new_entry->f == f && f->state == FLOW_STATE_TYPED);
+ ASSERT(f->pif[INISIDE] != PIF_NONE && f->pif[FWDSIDE] != PIF_NONE);
flow_set_state(f, FLOW_STATE_ACTIVE);
flow_new_entry = NULL;
@@ -234,6 +278,8 @@ void flow_alloc_cancel(union flow *flow)
{
ASSERT(flow_new_entry == flow);
ASSERT(flow->f.state == FLOW_STATE_NEW ||
+ flow->f.state == FLOW_STATE_INI ||
+ flow->f.state == FLOW_STATE_FWD ||
flow->f.state == FLOW_STATE_TYPED);
ASSERT(flow_first_free > FLOW_IDX(flow));
diff --git a/flow.h b/flow.h
index 28169a8..9871e3b 100644
--- a/flow.h
+++ b/flow.h
@@ -25,25 +25,56 @@
* NEW - Freshly allocated, uninitialised entry
* Operations:
* - flow_alloc_cancel() returns the entry to FREE state
+ * - flow_initiate() sets the entry's INISIDE details and moves to
+ * INI state
* - FLOW_SET_TYPE() sets the entry's type and moves to TYPED state
* Caveats:
* - No fields other than state may be accessed.
- * - At most one entry may be in NEW or TYPED state at a time, so it's
- * unsafe to use flow_alloc() again until this entry moves to
- * ACTIVE or FREE state
+ * - At most one entry may be in NEW, INI, FWD or TYPED state at a
+ * time, so it's unsafe to use flow_alloc() again until this entry
+ * moves to ACTIVE or FREE state
* - You may not return to the main epoll loop while an entry is in
* NEW state.
*
+ * INI - An entry with INISIDE common information completed
+ * Operations:
+ * - Common fields related to INISIDE may be read
+ * - flow_alloc_cancel() returns the entry to FREE state
+ * - flow_forward() sets the entry's FWDSIDE details and moves to FWD
+ * state
+ * Caveats:
+ * - Other common fields may not be read
+ * - Type specific fields may not be read or written
+ * - At most one entry may be in NEW, INI, FWD or TYPED state at a
+ * time, so it's unsafe to use flow_alloc() again until this entry
+ * moves to ACTIVE or FREE state
+ * - You may not return to the main epoll loop while an entry is in
+ * INI state.
+ *
+ * FWD - An entry with only INISIDE and FWDSIDE common information completed
+ * Operations:
+ * - Common fields related to INISIDE & FWDSIDE may be read
+ * - flow_alloc_cancel() returns the entry to FREE state
+ * - FLOW_SET_TYPE() sets the entry's type and moves to TYPED state
+ * Caveats:
+ * - Other common fields may not be read
+ * - Type specific fields may not be read or written
+ * - At most one entry may be in NEW, INI, FWD or TYPED state at a
+ * time, so it's unsafe to use flow_alloc() again until this entry
+ * moves to ACTIVE or FREE state
+ * - You may not return to the main epoll loop while an entry is in
+ * FWD state.
+ *
* TYPED - Generic info initialised, type specific initialisation underway
* Operations:
* - All common fields may be read
* - Type specific fields may be read and written
* - flow_alloc_cancel() returns the entry to FREE state
- * - FLOW_ACTIVATE() moves the entry to ACTIVE STATE
+ * - FLOW_ACTIVATE() moves the entry to ACTIVE state
* Caveats:
- * - At most one entry may be in NEW or TYPED state at a time, so it's
- * unsafe to use flow_alloc() again until this entry moves to
- * ACTIVE or FREE state
+ * - At most one entry may be in NEW, INI, FWD or TYPED state at a
+ * time, so it's unsafe to use flow_alloc() again until this entry
+ * moves to ACTIVE or FREE state
* - You may not return to the main epoll loop while an entry is in
* TYPED state.
*
@@ -59,6 +90,8 @@
enum flow_state {
FLOW_STATE_FREE,
FLOW_STATE_NEW,
+ FLOW_STATE_INI,
+ FLOW_STATE_FWD,
FLOW_STATE_TYPED,
FLOW_STATE_ACTIVE,
@@ -104,10 +137,12 @@ extern const uint8_t flow_proto[];
* struct flow_common - Common fields for packet flows
* @state: State of the flow table entry
* @type: Type of packet flow
+ * @pif[]: Interface for each side of the flow
*/
struct flow_common {
uint8_t state;
uint8_t type;
+ uint8_t pif[SIDES];
};
#define FLOW_INDEX_BITS 17 /* 128k - 1 */
diff --git a/flow_table.h b/flow_table.h
index 7c98195..01c9326 100644
--- a/flow_table.h
+++ b/flow_table.h
@@ -107,6 +107,9 @@ static inline flow_sidx_t flow_sidx(const struct flow_common *f,
union flow *flow_alloc(void);
void flow_alloc_cancel(union flow *flow);
+void flow_initiate(union flow *flow, uint8_t pif);
+void flow_forward(union flow *flow, uint8_t pif);
+
union flow *flow_set_type(union flow *flow, enum flow_type type);
#define FLOW_SET_TYPE(flow_, t_, var_) (&flow_set_type((flow_), (t_))->var_)
diff --git a/icmp.c b/icmp.c
index 6df0989..f5b8405 100644
--- a/icmp.c
+++ b/icmp.c
@@ -163,6 +163,8 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
if (!flow)
return NULL;
+ flow_initiate(flow, PIF_TAP);
+ flow_forward(flow, PIF_HOST);
pingf = FLOW_SET_TYPE(flow, flowtype, ping);
pingf->seq = -1;
diff --git a/pif.h b/pif.h
index bd52936..ca85b34 100644
--- a/pif.h
+++ b/pif.h
@@ -38,7 +38,6 @@ static inline const char *pif_type(enum pif_type pt)
return "?";
}
-/* cppcheck-suppress unusedFunction */
static inline const char *pif_name(uint8_t pif)
{
return pif_type(pif);
diff --git a/tcp.c b/tcp.c
index 06401ba..48aae30 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1950,6 +1950,8 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
if (!(flow = flow_alloc()))
return;
+ flow_initiate(flow, PIF_TAP);
+
if (af == AF_INET) {
if (IN4_IS_ADDR_UNSPECIFIED(saddr) ||
IN4_IS_ADDR_BROADCAST(saddr) ||
@@ -2002,6 +2004,7 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
goto cancel;
}
+ flow_forward(flow, PIF_HOST);
conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp);
conn->tapside = INISIDE;
conn->sock = s;
@@ -2722,7 +2725,10 @@ static void tcp_tap_conn_from_sock(struct ctx *c, in_port_t dstport,
const union sockaddr_inany *sa,
const struct timespec *now)
{
- struct tcp_tap_conn *conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp);
+ struct tcp_tap_conn *conn;
+
+ flow_forward(flow, PIF_TAP);
+ conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp);
conn->tapside = FWDSIDE;
conn->sock = s;
@@ -2771,6 +2777,8 @@ void tcp_listen_handler(struct ctx *c, union epoll_ref ref,
if (s < 0)
goto cancel;
+ flow_initiate(flow, ref.tcp_listen.pif);
+
if (sa.sa_family == AF_INET) {
const struct in_addr *addr = &sa.sa4.sin_addr;
in_port_t port = sa.sa4.sin_port;
diff --git a/tcp_splice.c b/tcp_splice.c
index 5da7021..0e02732 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -472,6 +472,7 @@ bool tcp_splice_conn_from_sock(const struct ctx *c,
return false;
}
+ flow_forward(flow, pif1);
conn = FLOW_SET_TYPE(flow, FLOW_TCP_SPLICE, tcp_splice);
conn->flags = af == AF_INET ? 0 : SPLICE_V6;
--
@@ -472,6 +472,7 @@ bool tcp_splice_conn_from_sock(const struct ctx *c,
return false;
}
+ flow_forward(flow, pif1);
conn = FLOW_SET_TYPE(flow, FLOW_TCP_SPLICE, tcp_splice);
conn->flags = af == AF_INET ? 0 : SPLICE_V6;
--
2.45.0
next prev parent reply other threads:[~2024-05-14 1:03 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-05-14 1:03 [PATCH v5 00/19] RFC: Unified flow table David Gibson
2024-05-14 1:03 ` [PATCH v5 01/19] flow: Clarify and enforce flow state transitions David Gibson
2024-05-16 9:30 ` Stefano Brivio
[not found] ` <ZkbVxtvmP7f0aL1S@zatzit>
2024-05-17 11:00 ` Stefano Brivio
2024-05-18 6:47 ` David Gibson
2024-05-14 1:03 ` [PATCH v5 02/19] flow: Make side 0 always be the initiating side David Gibson
2024-05-16 12:06 ` Stefano Brivio
2024-05-14 1:03 ` David Gibson [this message]
2024-05-14 1:03 ` [PATCH v5 04/19] tcp: Remove interim 'tapside' field from connection David Gibson
2024-05-14 1:03 ` [PATCH v5 05/19] flow: Common data structures for tracking flow addresses David Gibson
2024-05-14 1:03 ` [PATCH v5 06/19] flow: Populate address information for initiating side David Gibson
[not found] ` <20240516202337.1b90e5f2@elisabeth>
[not found] ` <ZkbcwkdEwjGv6uwG@zatzit>
[not found] ` <20240517215845.4d09eaae@elisabeth>
2024-05-18 7:00 ` David Gibson
2024-05-14 1:03 ` [PATCH v5 07/19] flow: Populate address information for non-initiating side David Gibson
2024-05-14 1:03 ` [PATCH v5 08/19] tcp, flow: Remove redundant information, repack connection structures David Gibson
2024-05-14 1:03 ` [PATCH v5 09/19] tcp: Obtain guest address from flowside David Gibson
2024-05-14 1:03 ` [PATCH v5 10/19] tcp: Simplify endpoint validation using flowside information David Gibson
2024-05-14 1:03 ` [PATCH v5 11/19] tcp_splice: Eliminate SPLICE_V6 flag David Gibson
2024-05-14 1:03 ` [PATCH v5 12/19] tcp, flow: Replace TCP specific hash function with general flow hash David Gibson
2024-05-14 1:03 ` [PATCH v5 13/19] flow, tcp: Generalise TCP hash table to general flow hash table David Gibson
2024-05-14 1:03 ` [PATCH v5 14/19] tcp: Re-use flow hash for initial sequence number generation David Gibson
2024-05-14 1:03 ` [PATCH v5 15/19] icmp: Use flowsides as the source of truth wherever possible David Gibson
[not found] ` <20240516225350.06aebcd7@elisabeth>
[not found] ` <ZkcAHhCpx3F0SW2K@zatzit>
[not found] ` <20240517221123.1c7197a3@elisabeth>
2024-05-18 7:08 ` David Gibson
2024-05-14 1:03 ` [PATCH v5 16/19] icmp: Look up ping flows using flow hash David Gibson
2024-05-14 1:03 ` [PATCH v5 17/19] icmp: Eliminate icmp_id_map David Gibson
2024-05-14 1:03 ` [PATCH v5 18/19] flow, tcp: Flow based NAT and port forwarding for TCP David Gibson
[not found] ` <20240518001345.2d127b09@elisabeth>
2024-05-20 5:44 ` David Gibson
2024-05-14 1:03 ` [PATCH v5 19/19] flow, icmp: Use general flow forwarding rules for ICMP David Gibson
[not found] ` <20240518001408.004011b2@elisabeth>
2024-05-20 5:56 ` David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240514010337.1104606-4-david@gibson.dropbear.id.au \
--to=david@gibson.dropbear.id.au \
--cc=passt-dev@passt.top \
--cc=sbrivio@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).