public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: passt-dev@passt.top, Stefano Brivio <sbrivio@redhat.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v3 12/16] flow,tcp: Use epoll_ref type including flow and side
Date: Thu, 30 Nov 2023 13:02:18 +1100	[thread overview]
Message-ID: <20231130020222.4056647-13-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20231130020222.4056647-1-david@gibson.dropbear.id.au>

Currently TCP uses the 'flow' epoll_ref field for both connected
sockets and timers, which consists of just the index of the relevant
flow (connection).

This is just fine for timers, for while it obviously works, it's
subtly incomplete for sockets on spliced connections.  In that case we
want to know which side of the connection the event is occurring on as
well as which connection.  At present, we deduce that information by
looking at the actual fd, and comparing it to the fds of the sockets
on each side.

When we use the flow table for more things, we expect more cases where
something will need to know a specific side of a specific flow for an
event, but nothing more.

Therefore add a new 'flowside' epoll_ref field, with exactly that
information.  We use it for TCP connected sockets.  This allows us to
directly know the side for spliced connections.  For "tap"
connections, it's pretty meaningless, since the side is always the
socket side.  It still makes logical sense though, and it may become
important for future flow table work.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 flow.h       |  2 +-
 passt.h      |  2 ++
 tcp.c        | 11 ++++++++---
 tcp_splice.c | 37 ++++++++++++-------------------------
 tcp_splice.h |  2 +-
 5 files changed, 24 insertions(+), 30 deletions(-)

diff --git a/flow.h b/flow.h
index 4f12831..c2a5190 100644
--- a/flow.h
+++ b/flow.h
@@ -45,7 +45,7 @@ struct flow_common {
  * @flow:	Index of flow referenced
  */
 typedef struct flow_sidx {
-	int		side :1;
+	unsigned	side :1;
 	unsigned	flow :FLOW_INDEX_BITS;
 } flow_sidx_t;
 static_assert(sizeof(flow_sidx_t) <= sizeof(uint32_t),
diff --git a/passt.h b/passt.h
index 66a819f..33b493f 100644
--- a/passt.h
+++ b/passt.h
@@ -37,6 +37,7 @@ union epoll_ref;
 
 #include "pif.h"
 #include "packet.h"
+#include "flow.h"
 #include "icmp.h"
 #include "port_fwd.h"
 #include "tcp.h"
@@ -91,6 +92,7 @@ union epoll_ref {
 		int32_t		fd:FD_REF_BITS;
 		union {
 			uint32_t flow;
+			flow_sidx_t flowside;
 			union tcp_listen_epoll_ref tcp_listen;
 			union udp_epoll_ref udp;
 			union icmp_epoll_ref icmp;
diff --git a/tcp.c b/tcp.c
index e80a6aa..24aba44 100644
--- a/tcp.c
+++ b/tcp.c
@@ -304,6 +304,10 @@
 #include "tcp_conn.h"
 #include "flow_table.h"
 
+/* Sides of a flow as we use them in "tap" connections */
+#define	SOCKSIDE	0
+#define	TAPSIDE		1
+
 #define TCP_FRAMES_MEM			128
 #define TCP_FRAMES							\
 	(c->mode == MODE_PASST ? TCP_FRAMES_MEM : 1)
@@ -639,7 +643,7 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
 {
 	int m = conn->in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
 	union epoll_ref ref = { .type = EPOLL_TYPE_TCP, .fd = conn->sock,
-				.flow = FLOW_IDX(conn) };
+				.flowside = FLOW_SIDX(conn, SOCKSIDE) };
 	struct epoll_event ev = { .data.u64 = ref.u64 };
 
 	if (conn->events == CLOSED) {
@@ -2874,14 +2878,15 @@ static void tcp_tap_sock_handler(struct ctx *c, struct tcp_tap_conn *conn,
  */
 void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events)
 {
-	union flow *flow = FLOW(ref.flow);
+	union flow *flow = FLOW(ref.flowside.flow);
 
 	switch (flow->f.type) {
 	case FLOW_TCP:
 		tcp_tap_sock_handler(c, &flow->tcp, events);
 		break;
 	case FLOW_TCP_SPLICE:
-		tcp_splice_sock_handler(c, &flow->tcp_splice, ref.fd, events);
+		tcp_splice_sock_handler(c, &flow->tcp_splice, ref.flowside.side,
+					events);
 		break;
 	default:
 		die("Unexpected %s in tcp_sock_handler_compact()",
diff --git a/tcp_splice.c b/tcp_splice.c
index 9cec9c6..d5c351b 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -128,8 +128,10 @@ static int tcp_splice_epoll_ctl(const struct ctx *c,
 {
 	int m = conn->in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
 	union epoll_ref ref[SIDES] = {
-		{ .type = EPOLL_TYPE_TCP, .fd = conn->s[0], .flow = FLOW_IDX(conn) },
-		{ .type = EPOLL_TYPE_TCP, .fd = conn->s[1], .flow = FLOW_IDX(conn) }
+		{ .type = EPOLL_TYPE_TCP, .fd = conn->s[0],
+		  .flowside = FLOW_SIDX(conn, 0) },
+		{ .type = EPOLL_TYPE_TCP, .fd = conn->s[1],
+		  .flowside = FLOW_SIDX(conn, 1) }
 	};
 	struct epoll_event ev[SIDES] = { { .data.u64 = ref[0].u64 },
 					 { .data.u64 = ref[1].u64 } };
@@ -481,13 +483,13 @@ bool tcp_splice_conn_from_sock(const struct ctx *c,
  * tcp_splice_sock_handler() - Handler for socket mapped to spliced connection
  * @c:		Execution context
  * @conn:	Connection state
- * @s:		Socket fd on which an event has occurred
+ * @side:	Side of the connection on which an event has occurred
  * @events:	epoll events bitmap
  *
  * #syscalls:pasta splice
  */
 void tcp_splice_sock_handler(struct ctx *c, struct tcp_splice_conn *conn,
-			     int s, uint32_t events)
+			     int side, uint32_t events)
 {
 	uint8_t lowat_set_flag, lowat_act_flag;
 	int eof, never_read;
@@ -507,30 +509,15 @@ void tcp_splice_sock_handler(struct ctx *c, struct tcp_splice_conn *conn,
 	}
 
 	if (events & EPOLLOUT) {
-		if (s == conn->s[0]) {
-			conn_event(c, conn, ~OUT_WAIT_0);
-			fromside = 1;
-		} else {
-			conn_event(c, conn, ~OUT_WAIT_1);
-			fromside = 0;
-		}
+		fromside = !side;
+		conn_event(c, conn, side == 0 ? ~OUT_WAIT_0 : ~OUT_WAIT_1);
 	} else {
-		fromside = s == conn->s[0] ? 0 : 1;
-	}
-
-	if (events & EPOLLRDHUP) {
-		if (s == conn->s[0])
-			conn_event(c, conn, FIN_RCVD_0);
-		else
-			conn_event(c, conn, FIN_RCVD_1);
+		fromside = side;
 	}
 
-	if (events & EPOLLHUP) {
-		if (s == conn->s[0])
-			conn_event(c, conn, FIN_SENT_0); /* Fake, but implied */
-		else
-			conn_event(c, conn, FIN_SENT_1);
-	}
+	if (events & EPOLLRDHUP)
+		/* For side 0 this is fake, but implied */
+		conn_event(c, conn, side == 0 ? FIN_RCVD_0 : FIN_RCVD_1);
 
 swap:
 	eof = 0;
diff --git a/tcp_splice.h b/tcp_splice.h
index dc486f1..aa85c7c 100644
--- a/tcp_splice.h
+++ b/tcp_splice.h
@@ -9,7 +9,7 @@
 struct tcp_splice_conn;
 
 void tcp_splice_sock_handler(struct ctx *c, struct tcp_splice_conn *conn,
-			     int s, uint32_t events);
+			     int side, uint32_t events);
 bool tcp_splice_conn_from_sock(const struct ctx *c,
 			       union tcp_listen_epoll_ref ref,
 			       struct tcp_splice_conn *conn, int s,
-- 
@@ -9,7 +9,7 @@
 struct tcp_splice_conn;
 
 void tcp_splice_sock_handler(struct ctx *c, struct tcp_splice_conn *conn,
-			     int s, uint32_t events);
+			     int side, uint32_t events);
 bool tcp_splice_conn_from_sock(const struct ctx *c,
 			       union tcp_listen_epoll_ref ref,
 			       struct tcp_splice_conn *conn, int s,
-- 
2.43.0


  parent reply	other threads:[~2023-11-30  2:02 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-30  2:02 [PATCH v3 00/16] Introduce unified flow table, first steps David Gibson
2023-11-30  2:02 ` [PATCH v3 01/16] treewide: Add messages to static_assert() calls David Gibson
2023-11-30  2:02 ` [PATCH v3 02/16] flow, tcp: Generalise connection types David Gibson
2023-11-30  2:02 ` [PATCH v3 03/16] flow, tcp: Move TCP connection table to unified flow table David Gibson
2023-11-30  2:02 ` [PATCH v3 04/16] flow, tcp: Consolidate flow pointer<->index helpers David Gibson
2023-11-30  2:02 ` [PATCH v3 05/16] util: MAX_FROM_BITS() should be unsigned David Gibson
2023-11-30  2:02 ` [PATCH v3 06/16] flow: Make unified version of flow table compaction David Gibson
2023-11-30  2:02 ` [PATCH v3 07/16] flow, tcp: Add logging helpers for connection related messages David Gibson
2023-11-30  2:02 ` [PATCH v3 08/16] flow: Introduce 'sidx' type to represent one side of one flow David Gibson
2023-12-02  4:35   ` Stefano Brivio
2023-11-30  2:02 ` [PATCH v3 09/16] tcp: Remove unneccessary bounds check in tcp_timer_handler() David Gibson
2023-11-30  2:02 ` [PATCH v3 10/16] flow,tcp: Generalise TCP epoll_ref to generic flows David Gibson
2023-11-30  2:02 ` [PATCH v3 11/16] tcp_splice: Use unsigned to represent side David Gibson
2023-11-30  2:02 ` David Gibson [this message]
2023-11-30  2:02 ` [PATCH v3 13/16] test: Avoid hitting guestfish command length limits David Gibson
2023-11-30  2:02 ` [PATCH v3 14/16] pif: Add helpers to get the name of a pif David Gibson
2023-11-30 12:45   ` Stefano Brivio
2023-12-01  0:08     ` David Gibson
2023-11-30  2:02 ` [PATCH v3 15/16] tcp: "TCP" hash secret doesn't need to be TCP specific David Gibson
2023-11-30  2:02 ` [PATCH v3 16/16] tcp: Don't defer hash table removal David Gibson
2023-11-30 12:45   ` Stefano Brivio
2023-12-01  0:07     ` David Gibson
2023-12-02  4:34       ` Stefano Brivio
2023-12-04  0:43         ` David Gibson
2023-12-04  9:54 ` [PATCH v3 00/16] Introduce unified flow table, first steps Stefano Brivio

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231130020222.4056647-13-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).