From mboxrd@z Thu Jan 1 00:00:00 1970 Authentication-Results: passt.top; dmarc=none (p=none dis=none) header.from=gibson.dropbear.id.au Authentication-Results: passt.top; dkim=pass (2048-bit key; secure) header.d=gibson.dropbear.id.au header.i=@gibson.dropbear.id.au header.a=rsa-sha256 header.s=202602 header.b=kPf60E9V; dkim-atps=neutral Received: from mail.ozlabs.org (gandalf.ozlabs.org [150.107.74.76]) by passt.top (Postfix) with ESMTPS id DC0B65A0269 for ; Thu, 21 May 2026 08:37:51 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gibson.dropbear.id.au; s=202602; t=1779345468; bh=q8T/MMQesLGBFOv4QfUfAynL+tm7sp7fd9aApyPeEgk=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=kPf60E9VbKMBe4LbolTCiiSRpjj9e4BeXwZfrQUrwUWnFEwylNTycTfAK4l3YEcZp KybPS5HZ6ViTMLGU2eHmXHenvWJiI+dN7u2cN7POWpekBIQq+A6WJeOS//LRYN3cG+ uqwX5AodI5YUPq+PCIqNQ1HqVjG5jr38IOgZmL7EEG7495lHKQWyWtrtRluL0aBL/Q 4z7FgBKWT//SdbTve7rBhWyRTmWOzjM2put5r+v2J5GVqxe9j0o9ktgR4bySbDV6jJ fST08FjR96bdDCK1tFCQikxERlSS+IfzUzZM3N7vbZ2d1CS4veNtKH46S9ct0pvZcZ qcCssMun+Mgzg== Received: by gandalf.ozlabs.org (Postfix, from userid 1007) id 4gLdy44cpVz4wKv; Thu, 21 May 2026 16:37:48 +1000 (AEST) From: David Gibson To: passt-dev@passt.top, Stefano Brivio Subject: [PATCH v2 3/4] tcp_splice: Clean up flow control path for splice forwarding Date: Thu, 21 May 2026 16:37:44 +1000 Message-ID: <20260521063745.1211215-4-david@gibson.dropbear.id.au> X-Mailer: git-send-email 2.54.0 In-Reply-To: <20260521063745.1211215-1-david@gibson.dropbear.id.au> References: <20260521063745.1211215-1-david@gibson.dropbear.id.au> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Message-ID-Hash: 5QAZXLAARIROQQ7WHS2HMT6ZRVI36BEM X-Message-ID-Hash: 5QAZXLAARIROQQ7WHS2HMT6ZRVI36BEM X-MailFrom: dgibson@gandalf.ozlabs.org X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header CC: Paul Holzinger , David Gibson X-Mailman-Version: 3.3.8 Precedence: list List-Id: Development discussion and patches for passt Archived-At: Archived-At: List-Archive: List-Archive: List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: Splice forwarding can be blocked either waiting for data from one side or waiting for space on the other. For that reason, tcp_splice_sock_handler() on either socket can forward data in either or both directions, depending on whether we have EPOLLIN, EPOLLOUT or both events. The flow control for this is quite hard to follow though, since we forward in one direction, then sometimes loop back with a goto to do it in the other direction. Simplify this by adding a tcp_splice_forward() function with the logic to forward in one direction and calling it either once or twice from tcp_splice_sock_handler(). Signed-off-by: David Gibson --- tcp_splice.c | 148 +++++++++++++++++++++++++++------------------------ 1 file changed, 78 insertions(+), 70 deletions(-) diff --git a/tcp_splice.c b/tcp_splice.c index 762a058e..ae92bbd9 100644 --- a/tcp_splice.c +++ b/tcp_splice.c @@ -474,72 +474,24 @@ void tcp_splice_conn_from_sock(const struct ctx *c, union flow *flow, int s0) } /** - * tcp_splice_sock_handler() - Handler for socket mapped to spliced connection + * tcp_splice_forward() - Forward data in one direction using splice() * @c: Execution context - * @ref: epoll reference - * @events: epoll events bitmap + * @conn: Connection to forward data for + * @fromsidei: Side to forward data from * @now: Current timestamp * + * Return: 0 on success, -1 on error (connection should be reset) + * * #syscalls:pasta splice */ -void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref, - uint32_t events, const struct timespec *now) +static int tcp_splice_forward(struct ctx *c, + struct tcp_splice_conn *conn, unsigned fromsidei, + const struct timespec *now) { - struct tcp_splice_conn *conn = conn_at_sidx(ref.flowside); - unsigned evsidei = ref.flowside.sidei, fromsidei; - uint8_t lowat_set_flag, lowat_act_flag; - int eof, never_read; - - assert(conn->f.type == FLOW_TCP_SPLICE); - - if (conn->events == SPLICE_CLOSED) - return; - - if (events & EPOLLERR) { - int err, rc; - socklen_t sl = sizeof(err); - - rc = getsockopt(ref.fd, SOL_SOCKET, SO_ERROR, &err, &sl); - if (rc) { - flow_perror(conn, "Error retrieving SO_ERROR"); - } else { - flow_dbg_ratelimit(conn, now, - "Error event on %s socket: %s", - pif_name(conn->f.pif[evsidei]), - strerror_(err)); - } - goto reset; - } - - if (conn->events == SPLICE_CONNECT) { - if (!(events & EPOLLOUT)) { - flow_err_ratelimit( - conn, now, - "Unexpected events 0x%x during connect", - events); - goto reset; - } - if (tcp_splice_connect_finish(c, conn)) - goto reset; - } - - if (events & EPOLLOUT) { - fromsidei = !evsidei; - conn_event(conn, ~OUT_WAIT(evsidei)); - } else { - fromsidei = evsidei; - } - - if (events & EPOLLRDHUP) - /* For side 0 this is fake, but implied */ - conn_event(conn, FIN_RCVD(evsidei)); - -swap: - eof = 0; - never_read = 1; - - lowat_set_flag = RCVLOWAT_SET(fromsidei); - lowat_act_flag = RCVLOWAT_ACT(fromsidei); + uint8_t lowat_set_flag = RCVLOWAT_SET(fromsidei); + uint8_t lowat_act_flag = RCVLOWAT_ACT(fromsidei); + int never_read = 1; + int eof = 0; while (1) { ssize_t readlen, written, pending; @@ -557,7 +509,7 @@ retry: flow_perror_ratelimit( conn, now, "Splicing from %s socket", pif_name(conn->f.pif[fromsidei])); - goto reset; + return -1; } flow_trace(conn, "%zi from read-side call", readlen); @@ -585,7 +537,7 @@ retry: flow_perror_ratelimit( conn, now, "Splicing to %s socket", pif_name(conn->f.pif[!fromsidei])); - goto reset; + return -1; } flow_trace(conn, "%zi from write-side call (passed %zi)", @@ -647,24 +599,80 @@ retry: flow_perror_ratelimit( conn, now, "shutdown() on %s", pif_name(conn->f.pif[!sidei])); - goto reset; + return -1; } conn_event(conn, FIN_SENT(!sidei)); } } } - if (CONN_HAS(conn, FIN_SENT(0) | FIN_SENT(1))) { - /* Clean close, no reset */ - conn_flag(conn, CLOSING); + return 0; +} + +/** + * tcp_splice_sock_handler() - Handler for socket mapped to spliced connection + * @c: Execution context + * @ref: epoll reference + * @events: epoll events bitmap + * @now: Current timestamp + */ +void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref, + uint32_t events, const struct timespec *now) +{ + struct tcp_splice_conn *conn = conn_at_sidx(ref.flowside); + unsigned evsidei = ref.flowside.sidei; + + assert(conn->f.type == FLOW_TCP_SPLICE); + + if (conn->events == SPLICE_CLOSED) return; + + if (events & EPOLLERR) { + int err, rc; + socklen_t sl = sizeof(err); + + rc = getsockopt(ref.fd, SOL_SOCKET, SO_ERROR, &err, &sl); + if (rc) + flow_perror(conn, "Error retrieving SO_ERROR"); + else + flow_dbg_ratelimit(conn, now, + "Error event on %s socket: %s", + pif_name(conn->f.pif[evsidei]), + strerror_(err)); + goto reset; } - if ((events & (EPOLLIN | EPOLLOUT)) == (EPOLLIN | EPOLLOUT)) { - events = EPOLLIN; + if (conn->events == SPLICE_CONNECT) { + if (!(events & EPOLLOUT)) { + flow_err_ratelimit( + conn, now, + "Unexpected events 0x%x during connect", + events); + goto reset; + } + if (tcp_splice_connect_finish(c, conn)) + goto reset; + } - fromsidei = !fromsidei; - goto swap; + if (events & EPOLLRDHUP) + /* For side 0 this is fake, but implied */ + conn_event(conn, FIN_RCVD(evsidei)); + + if (events & EPOLLOUT) { + if (tcp_splice_forward(c, conn, !evsidei, now)) + goto reset; + conn_event(conn, ~OUT_WAIT(evsidei)); + } + + if (events & EPOLLIN) { + if (tcp_splice_forward(c, conn, evsidei, now)) + goto reset; + } + + if (CONN_HAS(conn, FIN_SENT(0) | FIN_SENT(1))) { + /* Clean close, no reset */ + conn_flag(conn, CLOSING); + return; } if (events & EPOLLHUP) { -- 2.54.0