public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: passt-dev@passt.top, Stefano Brivio <sbrivio@redhat.com>
Cc: Paul Holzinger <pholzing@redhat.com>,
	David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH 3/6] tcp_splice: Clean up flow control path for splice forwarding
Date: Wed, 20 May 2026 23:08:48 +1000	[thread overview]
Message-ID: <20260520130851.436931-4-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20260520130851.436931-1-david@gibson.dropbear.id.au>

Splice forwarding can be blocked either waiting for data from one side
or waiting for space on the other.  For that reason,
tcp_splice_sock_handler() on either socket can forward data in either or
both directions, depending on whether we have EPOLLIN, EPOLLOUT or both
events.

The flow control for this is quite hard to follow though, since we forward
in one direction, then sometimes loop back with a goto to do it in the
other direction.  Simplify this by adding a tcp_splice_forward() function
with the logic to forward in one direction and calling it either once or
twice from tcp_splice_sock_handler().

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 tcp_splice.c | 137 ++++++++++++++++++++++++++-------------------------
 1 file changed, 71 insertions(+), 66 deletions(-)

diff --git a/tcp_splice.c b/tcp_splice.c
index 34ffea73..18e8b303 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -474,67 +474,20 @@ void tcp_splice_conn_from_sock(const struct ctx *c, union flow *flow, int s0)
 }
 
 /**
- * tcp_splice_sock_handler() - Handler for socket mapped to spliced connection
+ * tcp_splice_forward() - Forward data in one direction using splice()
  * @c:		Execution context
- * @ref:	epoll reference
- * @events:	epoll events bitmap
+ * @conn:	Connection to forward data for
+ * @fromsidei:	Side to forward data from
  *
  * #syscalls:pasta splice
  */
-void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref,
-			     uint32_t events)
+static int tcp_splice_forward(struct ctx *c, struct
+			      tcp_splice_conn *conn, unsigned fromsidei)
 {
-	struct tcp_splice_conn *conn = conn_at_sidx(ref.flowside);
-	unsigned evsidei = ref.flowside.sidei, fromsidei;
-	uint8_t lowat_set_flag, lowat_act_flag;
-	int eof, never_read;
-
-	assert(conn->f.type == FLOW_TCP_SPLICE);
-
-	if (conn->events == SPLICE_CLOSED)
-		return;
-
-	if (events & EPOLLERR) {
-		int err, rc;
-		socklen_t sl = sizeof(err);
-
-		rc = getsockopt(ref.fd, SOL_SOCKET, SO_ERROR, &err, &sl);
-		if (rc)
-			flow_perror(conn, "Error retrieving SO_ERROR");
-		else
-			flow_dbg(conn, "Error event on %s socket: %s",
-				 pif_name(conn->f.pif[evsidei]),
-				 strerror_(err));
-		goto reset;
-	}
-
-	if (conn->events == SPLICE_CONNECT) {
-		if (!(events & EPOLLOUT)) {
-			flow_err(conn, "Unexpected events 0x%x during connect",
-				 events);
-			goto reset;
-		}
-		if (tcp_splice_connect_finish(c, conn))
-			goto reset;
-	}
-
-	if (events & EPOLLOUT) {
-		fromsidei = !evsidei;
-		conn_event(conn, ~OUT_WAIT(evsidei));
-	} else {
-		fromsidei = evsidei;
-	}
-
-	if (events & EPOLLRDHUP)
-		/* For side 0 this is fake, but implied */
-		conn_event(conn, FIN_RCVD(evsidei));
-
-swap:
-	eof = 0;
-	never_read = 1;
-
-	lowat_set_flag = RCVLOWAT_SET(fromsidei);
-	lowat_act_flag = RCVLOWAT_ACT(fromsidei);
+	uint8_t lowat_set_flag = RCVLOWAT_SET(fromsidei);
+	uint8_t lowat_act_flag = RCVLOWAT_ACT(fromsidei);
+	int never_read = 1;
+	int eof = 0;
 
 	while (1) {
 		ssize_t readlen, written, pending;
@@ -551,7 +504,7 @@ retry:
 		if (readlen < 0 && errno != EAGAIN) {
 			flow_perror(conn, "Splicing from %s socket",
 				    pif_name(conn->f.pif[fromsidei]));
-			goto reset;
+			return -1;
 		}
 
 		flow_trace(conn, "%zi from read-side call", readlen);
@@ -578,7 +531,7 @@ retry:
 		if (written < 0 && errno != EAGAIN) {
 			flow_perror(conn, "Splicing to %s socket",
 				    pif_name(conn->f.pif[!fromsidei]));
-			goto reset;
+			return -1;
 		}
 
 		flow_trace(conn, "%zi from write-side call (passed %zi)",
@@ -639,24 +592,76 @@ retry:
 				if (shutdown(conn->s[!sidei], SHUT_WR) < 0) {
 					flow_perror(conn, "shutdown() on %s",
 						    pif_name(conn->f.pif[!sidei]));
-					goto reset;
+					return -1;
 				}
 				conn_event(conn, FIN_SENT(!sidei));
 			}
 		}
 	}
 
-	if (CONN_HAS(conn, FIN_SENT(0) | FIN_SENT(1))) {
-		/* Clean close, no reset */
-		conn_flag(conn, CLOSING);
+	return 0;
+}
+
+/**
+ * tcp_splice_sock_handler() - Handler for socket mapped to spliced connection
+ * @c:		Execution context
+ * @ref:	epoll reference
+ * @events:	epoll events bitmap
+ */
+void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref,
+			     uint32_t events)
+{
+	struct tcp_splice_conn *conn = conn_at_sidx(ref.flowside);
+	unsigned evsidei = ref.flowside.sidei;
+
+	assert(conn->f.type == FLOW_TCP_SPLICE);
+
+	if (conn->events == SPLICE_CLOSED)
 		return;
+
+	if (events & EPOLLERR) {
+		int err, rc;
+		socklen_t sl = sizeof(err);
+
+		rc = getsockopt(ref.fd, SOL_SOCKET, SO_ERROR, &err, &sl);
+		if (rc)
+			flow_perror(conn, "Error retrieving SO_ERROR");
+		else
+			flow_dbg(conn, "Error event on %s socket: %s",
+				 pif_name(conn->f.pif[evsidei]),
+				 strerror_(err));
+		goto reset;
+	}
+
+	if (conn->events == SPLICE_CONNECT) {
+		if (!(events & EPOLLOUT)) {
+			flow_err(conn, "Unexpected events 0x%x during connect",
+				 events);
+			goto reset;
+		}
+		if (tcp_splice_connect_finish(c, conn))
+			goto reset;
+	}
+
+	if (events & EPOLLRDHUP)
+		/* For side 0 this is fake, but implied */
+		conn_event(conn, FIN_RCVD(evsidei));
+
+	if (events & EPOLLOUT) {
+		if (tcp_splice_forward(c, conn, !evsidei))
+			goto reset;
+		conn_event(conn, ~OUT_WAIT(evsidei));
 	}
 
-	if ((events & (EPOLLIN | EPOLLOUT)) == (EPOLLIN | EPOLLOUT)) {
-		events = EPOLLIN;
+	if (events & EPOLLIN) {
+		if (tcp_splice_forward(c, conn, evsidei))
+		    goto reset;
+	}
 
-		fromsidei = !fromsidei;
-		goto swap;
+	if (CONN_HAS(conn, FIN_SENT(0) | FIN_SENT(1))) {
+		/* Clean close, no reset */
+		conn_flag(conn, CLOSING);
+		return;
 	}
 
 	if (events & EPOLLHUP) {
-- 
2.54.0


  parent reply	other threads:[~2026-05-20 13:08 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-20 13:08 [PATCH 0/6] Fix race condition while closing spliced connections David Gibson
2026-05-20 13:08 ` [PATCH 1/6] tcp_splice: Improve error reporting David Gibson
2026-05-20 14:31   ` Stefano Brivio
2026-05-20 13:08 ` [PATCH 2/6] tcp_splice: Avoid missing EOF recognition while forwarding David Gibson
2026-05-20 13:08 ` David Gibson [this message]
2026-05-20 13:08 ` [PATCH 4/6] tcp_splice: Simplify tracking of read/written bytes David Gibson
2026-05-20 13:08 ` [PATCH 5/6] tcp_splice: Simplify EPOLLRDHUP / eof / FIN handling David Gibson
2026-05-20 13:08 ` [PATCH 6/6] tcp_splice: Simplify shutdown(2) handling David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260520130851.436931-4-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    --cc=pholzing@redhat.com \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).