public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: passt-dev@passt.top, Stefano Brivio <sbrivio@redhat.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v2 11/22] flow: Clarify flow entry life cycle, introduce uniform logging
Date: Tue,  6 Feb 2024 12:17:23 +1100	[thread overview]
Message-ID: <20240206011734.884138-12-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20240206011734.884138-1-david@gibson.dropbear.id.au>

Our allocation scheme for flow entries means there are some non-obvious
constraints on when what things can be done with an entry.  Add a big doc
comment explaining the life cycle.

In addition, make a FLOW_START() macro to mark one of the important
transitions.  This encourages correct usage, by making it natural to only
access the flow type specific structure after calling it.  It also logs
that a new flow has been created, which is useful for debugging.

We also add logging when a flow's lifecycle ends.  This doesn't need a new
helper, because it can only happen either from flow_alloc_cancel() or from
the flow deferred handler.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 flow.c       | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 flow.h       |  5 ++++
 tcp.c        | 15 +++++------
 tcp_splice.c | 11 ++++----
 tcp_splice.h |  5 ++--
 5 files changed, 94 insertions(+), 18 deletions(-)

diff --git a/flow.c b/flow.c
index beb9749c..a155b54b 100644
--- a/flow.c
+++ b/flow.c
@@ -34,6 +34,45 @@ static_assert(ARRAY_SIZE(flow_proto) == FLOW_NUM_TYPES,
 
 /* Global Flow Table */
 
+/**
+ * DOC: Theory of Operation - flow entry life cycle
+ *
+ * An individual flow table entry moves through these logical states, usually in
+ * this order.
+ *
+ *    FREE - Part of the general pool of free flow table entries
+ *        Operations:
+ *            - flow_alloc() finds an entry and moves it to ALLOC state
+ *
+ *    ALLOC - A tentatively allocated entry
+ *        Operations:
+ *            - flow_alloc_cancel() returns the entry to FREE state
+ *            - FLOW_START() set the entry's type and moves to START state
+ *        Caveats:
+ *            - It's not safe to write fields in the flow entry
+ *            - It's not safe to allocate other entries with flow_alloc()
+ *            - It's not safe to return to the main epoll loop
+ *            - It's not safe to use flow_*() logging functions
+ *
+ *    START - An entry being prepared by flow type specific code
+ *        Operations:
+ *            - Flow type specific fields may be accessed
+ *            - flow_*() logging functions
+ *            - flow_alloc_cancel() returns the entry to FREE state
+ *        Caveats:
+ *            - Returning to the main epoll loop or allocating another entry
+ *              with flow_alloc() implicitly moves the entry to ACTIVE state.
+ *
+ *    ACTIVE - An active flow entry managed by flow type specific code
+ *        Operations:
+ *            - Flow type specific fields may be accessed
+ *            - flow_*() logging functions
+ *            - Flow may be expired by returning 'true' from flow type specific
+ *              deferred or timer handler.  This will return it to FREE state.
+ *        Caveats:
+ *            - It's not safe to call flow_alloc_cancel()
+ */
+
 /**
  * DOC: Theory of Operation - allocating and freeing flow entries
  *
@@ -109,6 +148,39 @@ void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...)
 	logmsg(pri, "Flow %u (%s): %s", flow_idx(f), FLOW_TYPE(f), msg);
 }
 
+/**
+ * flow_start() - Set flow type for new flow and log
+ * @flow:	Flow to set type for
+ * @type:	Type for new flow
+ * @iniside:	Which side initiated the new flow
+ *
+ * Return: @flow
+ *
+ * Should be called before setting any flow type specific fields in the flow
+ * table entry.
+ */
+union flow *flow_start(union flow *flow, enum flow_type type,
+		       unsigned iniside)
+{
+	(void)iniside;
+	flow->f.type = type;
+	flow_dbg(flow, "START %s", flow_type_str[flow->f.type]);
+	return flow;
+}
+
+/**
+ * flow_end() - Clear flow type for finished flow and log
+ * @flow:	Flow to clear
+ */
+static void flow_end(union flow *flow)
+{
+	if (flow->f.type == FLOW_TYPE_NONE)
+		return; /* Nothing to do */
+
+	flow_dbg(flow, "END %s", flow_type_str[flow->f.type]);
+	flow->f.type = FLOW_TYPE_NONE;
+}
+
 /**
  * flow_alloc() - Allocate a new flow
  *
@@ -157,7 +229,7 @@ void flow_alloc_cancel(union flow *flow)
 {
 	ASSERT(flow_first_free > FLOW_IDX(flow));
 
-	flow->f.type = FLOW_TYPE_NONE;
+	flow_end(flow);
 	/* Put it back in a length 1 free cluster, don't attempt to fully
 	 * reverse flow_alloc()s steps.  This will get folded together the next
 	 * time flow_defer_handler runs anyway() */
@@ -227,7 +299,7 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
 		}
 
 		if (closed) {
-			flow->f.type = FLOW_TYPE_NONE;
+			flow_end(flow);
 
 			if (free_head) {
 				/* Add slot to current free cluster */
diff --git a/flow.h b/flow.h
index e9b3ce3e..8b66751b 100644
--- a/flow.h
+++ b/flow.h
@@ -45,6 +45,11 @@ struct flow_common {
 #define FLOW_TABLE_PRESSURE		30	/* % of FLOW_MAX */
 #define FLOW_FILE_PRESSURE		30	/* % of c->nofile */
 
+union flow *flow_start(union flow *flow, enum flow_type type,
+		       unsigned iniside);
+#define FLOW_START(flow_, t_, var_, i_)		\
+	(&flow_start((flow_), (t_), (i_))->var_)
+
 /**
  * struct flow_sidx - ID for one side of a specific flow
  * @side:	Side referenced (0 or 1)
diff --git a/tcp.c b/tcp.c
index 3722dc09..e15b932f 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1952,8 +1952,7 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
 			goto cancel;
 	}
 
-	conn = &flow->tcp;
-	conn->f.type = FLOW_TCP;
+	conn = FLOW_START(flow, FLOW_TCP, tcp, TAPSIDE);
 	conn->sock = s;
 	conn->timer = -1;
 	conn_event(c, conn, TAP_SYN_RCVD);
@@ -2658,18 +2657,19 @@ static void tcp_snat_inbound(const struct ctx *c, union inany_addr *addr)
  * tcp_tap_conn_from_sock() - Initialize state for non-spliced connection
  * @c:		Execution context
  * @ref:	epoll reference of listening socket
- * @conn:	connection structure to initialize
+ * @flow:	flow to initialise
  * @s:		Accepted socket
  * @sa:		Peer socket address (from accept())
  * @now:	Current timestamp
  */
 static void tcp_tap_conn_from_sock(struct ctx *c,
 				   union tcp_listen_epoll_ref ref,
-				   struct tcp_tap_conn *conn, int s,
+				   union flow *flow, int s,
 				   const union sockaddr_inany *sa,
 				   const struct timespec *now)
 {
-	conn->f.type = FLOW_TCP;
+	struct tcp_tap_conn *conn = FLOW_START(flow, FLOW_TCP, tcp, SOCKSIDE);
+
 	conn->sock = s;
 	conn->timer = -1;
 	conn->ws_to_tap = conn->ws_from_tap = 0;
@@ -2715,11 +2715,10 @@ void tcp_listen_handler(struct ctx *c, union epoll_ref ref,
 		goto cancel;
 
 	if (c->mode == MODE_PASTA &&
-	    tcp_splice_conn_from_sock(c, ref.tcp_listen, &flow->tcp_splice,
-				      s, &sa))
+	    tcp_splice_conn_from_sock(c, ref.tcp_listen, flow, s, &sa))
 		return;
 
-	tcp_tap_conn_from_sock(c, ref.tcp_listen, &flow->tcp, s, &sa, now);
+	tcp_tap_conn_from_sock(c, ref.tcp_listen, flow, s, &sa, now);
 	return;
 
 cancel:
diff --git a/tcp_splice.c b/tcp_splice.c
index 180a9ea7..576fe9be 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -424,7 +424,7 @@ static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
  * tcp_splice_conn_from_sock() - Attempt to init state for a spliced connection
  * @c:		Execution context
  * @ref:	epoll reference of listening socket
- * @conn:	connection structure to initialize
+ * @flow:	flow to initialise
  * @s:		Accepted socket
  * @sa:		Peer address of connection
  *
@@ -432,10 +432,10 @@ static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
  * #syscalls:pasta setsockopt
  */
 bool tcp_splice_conn_from_sock(const struct ctx *c,
-			       union tcp_listen_epoll_ref ref,
-			       struct tcp_splice_conn *conn, int s,
-			       const union sockaddr_inany *sa)
+			       union tcp_listen_epoll_ref ref, union flow *flow,
+			       int s, const union sockaddr_inany *sa)
 {
+	struct tcp_splice_conn *conn;
 	union inany_addr aany;
 	in_port_t port;
 
@@ -445,7 +445,8 @@ bool tcp_splice_conn_from_sock(const struct ctx *c,
 	if (!inany_is_loopback(&aany))
 		return false;
 
-	conn->f.type = FLOW_TCP_SPLICE;
+	conn = FLOW_START(flow, FLOW_TCP_SPLICE, tcp_splice, 0);
+
 	conn->flags = inany_v4(&aany) ? 0 : SPLICE_V6;
 	conn->s[0] = s;
 	conn->s[1] = -1;
diff --git a/tcp_splice.h b/tcp_splice.h
index 20f41b39..5a471af0 100644
--- a/tcp_splice.h
+++ b/tcp_splice.h
@@ -12,9 +12,8 @@ union sockaddr_inany;
 void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref,
 			     uint32_t events);
 bool tcp_splice_conn_from_sock(const struct ctx *c,
-			       union tcp_listen_epoll_ref ref,
-			       struct tcp_splice_conn *conn, int s,
-			       const union sockaddr_inany *sa);
+			       union tcp_listen_epoll_ref ref, union flow *flow,
+			       int s, const union sockaddr_inany *sa);
 void tcp_splice_init(struct ctx *c);
 
 #endif /* TCP_SPLICE_H */
-- 
@@ -12,9 +12,8 @@ union sockaddr_inany;
 void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref,
 			     uint32_t events);
 bool tcp_splice_conn_from_sock(const struct ctx *c,
-			       union tcp_listen_epoll_ref ref,
-			       struct tcp_splice_conn *conn, int s,
-			       const union sockaddr_inany *sa);
+			       union tcp_listen_epoll_ref ref, union flow *flow,
+			       int s, const union sockaddr_inany *sa);
 void tcp_splice_init(struct ctx *c);
 
 #endif /* TCP_SPLICE_H */
-- 
2.43.0


  parent reply	other threads:[~2024-02-06  1:17 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-06  1:17 [PATCH v2 00/22] More flow table preliminaries: address handling improvements David Gibson
2024-02-06  1:17 ` [PATCH v2 01/22] treewide: Use sa_family_t for address family variables David Gibson
2024-02-06  1:17 ` [PATCH v2 02/22] inany: Helper to test for various address types David Gibson
2024-02-18 20:58   ` Stefano Brivio
2024-02-19  1:48     ` David Gibson
2024-02-06  1:17 ` [PATCH v2 03/22] inany: Add inany_ntop() helper David Gibson
2024-02-06  1:17 ` [PATCH v2 04/22] inany: Provide more conveniently typed constants for special addresses David Gibson
2024-02-06  1:17 ` [PATCH v2 05/22] inany: Introduce union sockaddr_inany David Gibson
2024-02-06  1:17 ` [PATCH v2 06/22] util: Allow IN4_IS_* macros to operate on untyped addresses David Gibson
2024-02-06  1:17 ` [PATCH v2 07/22] tcp, udp: Don't precompute port remappings in epoll references David Gibson
2024-02-06  1:17 ` [PATCH v2 08/22] flow: Add helper to determine a flow's protocol David Gibson
2024-02-06  1:17 ` [PATCH v2 09/22] tcp_splice: Simplify clean up logic David Gibson
2024-02-18 20:59   ` Stefano Brivio
2024-02-19  1:50     ` David Gibson
2024-02-06  1:17 ` [PATCH v2 10/22] tcp_splice: Don't use flow_trace() before setting flow type David Gibson
2024-02-06  1:17 ` David Gibson [this message]
2024-02-18 21:00   ` [PATCH v2 11/22] flow: Clarify flow entry life cycle, introduce uniform logging Stefano Brivio
2024-02-19  1:58     ` David Gibson
2024-02-06  1:17 ` [PATCH v2 12/22] tcp, tcp_splice: Helpers for getting sockets from the pools David Gibson
2024-02-18 21:00   ` Stefano Brivio
2024-02-19  1:51     ` David Gibson
2024-02-06  1:17 ` [PATCH v2 13/22] tcp_splice: More specific variable names in new splice path David Gibson
2024-02-18 21:00   ` Stefano Brivio
2024-02-19  1:53     ` David Gibson
2024-02-06  1:17 ` [PATCH v2 14/22] tcp_splice: Merge tcp_splice_new() into its caller David Gibson
2024-02-06  1:17 ` [PATCH v2 15/22] tcp_splice: Make tcp_splice_connect() create its own sockets David Gibson
2024-02-06  1:17 ` [PATCH v2 16/22] tcp_splice: Improve error reporting on connect path David Gibson
2024-02-18 21:01   ` Stefano Brivio
2024-02-19  3:23     ` David Gibson
2024-02-06  1:17 ` [PATCH v2 17/22] tcp_splice: Improve logic deciding when to splice David Gibson
2024-02-06  1:17 ` [PATCH v2 18/22] tcp, tcp_splice: Parse listening socket epoll ref in tcp_listen_handler() David Gibson
2024-02-06  1:17 ` [PATCH v2 19/22] tcp: Validate TCP endpoint addresses David Gibson
2024-02-22 12:45   ` Stefano Brivio
2024-02-23  3:56     ` David Gibson
2024-02-06  1:17 ` [PATCH v2 20/22] tap: Disallow loopback addresses on tap interface David Gibson
2024-02-06  1:17 ` [PATCH v2 21/22] port_fwd: Fix copypasta error in port_fwd_scan_udp() comments David Gibson
2024-02-06  1:17 ` [PATCH v2 22/22] fwd: Rename port_fwd.[ch] and their contents David Gibson
2024-02-27 14:22 ` [PATCH v2 00/22] More flow table preliminaries: address handling improvements Stefano Brivio

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240206011734.884138-12-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).