From: Stefano Brivio <sbrivio@redhat.com>
To: passt-dev@passt.top
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH 2/3] tcp, tcp_splice: Fix port remapping for inbound, spliced connections
Date: Tue, 11 Oct 2022 01:33:49 +0200 [thread overview]
Message-ID: <20221010233350.1198630-3-sbrivio@redhat.com> (raw)
In-Reply-To: <20221010233350.1198630-1-sbrivio@redhat.com>
It's indeed convenient to use the final destination port in the epoll
reference data, so that we don't have to check the configured port
deltas before establishing connections. However, this doesn't work if
we need to access the original port information to know what to do
once we receive a connection.
The existing implementation resulted in inbound, spliced connections
with a remapped destination port (and only those) to be attempted on
the outbound side, as we would check the wrong position in port
bitmaps to establish whether to use inbound or outbound sockets.
For listening spliced sockets, set the port index in the epoll
reference to the original port. Once we get a connection, use the
port delta arrays to find the final destination port, and the
original destination port to decide what socket pool we should use.
This avoids the need for a reverse mapping table as implemented for
UDP.
Fixes: 33482d5bf293 ("passt: Add PASTA mode, major rework")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
---
tcp.c | 19 ++++++++-----------
tcp_splice.c | 18 ++++++++++++++----
2 files changed, 22 insertions(+), 15 deletions(-)
diff --git a/tcp.c b/tcp.c
index 63153b6..cc08353 100644
--- a/tcp.c
+++ b/tcp.c
@@ -3084,15 +3084,16 @@ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
const void *addr, const char *ifname, in_port_t port)
{
+ union tcp_epoll_ref tref_spliced = { .tcp.listen = 1, .tcp.splice = 1 };
union tcp_epoll_ref tref = { .tcp.listen = 1 };
const void *bind_addr;
int s;
- if (ns) {
+ if (ns)
tref.tcp.index = (in_port_t)(port + c->tcp.fwd_out.delta[port]);
- } else {
+ else
tref.tcp.index = (in_port_t)(port + c->tcp.fwd_in.delta[port]);
- }
+ tref_spliced.tcp.index = (in_port_t) port;
if (af == AF_INET || af == AF_UNSPEC) {
if (!addr && c->mode == MODE_PASTA)
@@ -3100,8 +3101,7 @@ void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
else
bind_addr = addr;
- tref.tcp.v6 = 0;
- tref.tcp.splice = 0;
+ tref.tcp.v6 = tref_spliced.tcp.v6 = 0;
if (!ns) {
s = sock_l4(c, AF_INET, IPPROTO_TCP, bind_addr, ifname,
@@ -3118,9 +3118,8 @@ void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
if (c->mode == MODE_PASTA) {
bind_addr = &(uint32_t){ htonl(INADDR_LOOPBACK) };
- tref.tcp.splice = 1;
s = sock_l4(c, AF_INET, IPPROTO_TCP, bind_addr, ifname,
- port, tref.u32);
+ port, tref_spliced.u32);
if (s >= 0)
tcp_sock_set_bufsize(c, s);
else
@@ -3141,9 +3140,8 @@ void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
else
bind_addr = addr;
- tref.tcp.v6 = 1;
+ tref.tcp.v6 = tref_spliced.tcp.v6 = 1;
- tref.tcp.splice = 0;
if (!ns) {
s = sock_l4(c, AF_INET6, IPPROTO_TCP, bind_addr, ifname,
port, tref.u32);
@@ -3159,9 +3157,8 @@ void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
if (c->mode == MODE_PASTA) {
bind_addr = &in6addr_loopback;
- tref.tcp.splice = 1;
s = sock_l4(c, AF_INET6, IPPROTO_TCP, bind_addr, ifname,
- port, tref.u32);
+ port, tref_spliced.u32);
if (s >= 0)
tcp_sock_set_bufsize(c, s);
else
diff --git a/tcp_splice.c b/tcp_splice.c
index 96c31c8..bf5f62c 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -35,6 +35,7 @@
#include <fcntl.h>
#include <limits.h>
#include <stdint.h>
+#include <stdbool.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
@@ -512,13 +513,18 @@ static int tcp_splice_connect_ns(void *arg)
static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
in_port_t port)
{
- struct tcp_splice_connect_ns_arg ns_arg = { c, conn, port, 0 };
int *p, i, s = -1;
+ bool inbound;
- if (bitmap_isset(c->tcp.fwd_in.map, port))
+ if (bitmap_isset(c->tcp.fwd_in.map, port)) {
p = CONN_V6(conn) ? ns_sock_pool6 : ns_sock_pool4;
- else
+ port += c->tcp.fwd_in.delta[port];
+ inbound = true;
+ } else {
p = CONN_V6(conn) ? init_sock_pool6 : init_sock_pool4;
+ port += c->tcp.fwd_out.delta[port];
+ inbound = false;
+ }
for (i = 0; i < TCP_SOCK_POOL_SIZE; i++, p++) {
SWAP(s, *p);
@@ -526,11 +532,15 @@ static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
break;
}
- if (s < 0 && bitmap_isset(c->tcp.fwd_in.map, port)) {
+ /* No socket available in namespace: create a new one for connect() */
+ if (s < 0 && inbound) {
+ struct tcp_splice_connect_ns_arg ns_arg = { c, conn, port, 0 };
+
NS_CALL(tcp_splice_connect_ns, &ns_arg);
return ns_arg.ret;
}
+ /* Otherwise, the socket will connect on the side it was created on */
return tcp_splice_connect(c, conn, s, port);
}
--
@@ -35,6 +35,7 @@
#include <fcntl.h>
#include <limits.h>
#include <stdint.h>
+#include <stdbool.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
@@ -512,13 +513,18 @@ static int tcp_splice_connect_ns(void *arg)
static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
in_port_t port)
{
- struct tcp_splice_connect_ns_arg ns_arg = { c, conn, port, 0 };
int *p, i, s = -1;
+ bool inbound;
- if (bitmap_isset(c->tcp.fwd_in.map, port))
+ if (bitmap_isset(c->tcp.fwd_in.map, port)) {
p = CONN_V6(conn) ? ns_sock_pool6 : ns_sock_pool4;
- else
+ port += c->tcp.fwd_in.delta[port];
+ inbound = true;
+ } else {
p = CONN_V6(conn) ? init_sock_pool6 : init_sock_pool4;
+ port += c->tcp.fwd_out.delta[port];
+ inbound = false;
+ }
for (i = 0; i < TCP_SOCK_POOL_SIZE; i++, p++) {
SWAP(s, *p);
@@ -526,11 +532,15 @@ static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
break;
}
- if (s < 0 && bitmap_isset(c->tcp.fwd_in.map, port)) {
+ /* No socket available in namespace: create a new one for connect() */
+ if (s < 0 && inbound) {
+ struct tcp_splice_connect_ns_arg ns_arg = { c, conn, port, 0 };
+
NS_CALL(tcp_splice_connect_ns, &ns_arg);
return ns_arg.ret;
}
+ /* Otherwise, the socket will connect on the side it was created on */
return tcp_splice_connect(c, conn, s, port);
}
--
2.35.1
next prev parent reply other threads:[~2022-10-10 23:33 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-10 23:33 [PATCH 0/3] Fixes for spliced connections Stefano Brivio
2022-10-10 23:33 ` [PATCH 1/3] tcp, tcp_splice: Adjust comments to current meaning of inbound and outbound Stefano Brivio
2022-10-11 0:57 ` David Gibson
2022-10-10 23:33 ` Stefano Brivio [this message]
2022-10-11 1:19 ` [PATCH 2/3] tcp, tcp_splice: Fix port remapping for inbound, spliced connections David Gibson
2022-10-11 7:42 ` Stefano Brivio
2022-10-10 23:33 ` [PATCH 3/3] tcp: Don't create 'tap' socket for ports that are bound to loopback only Stefano Brivio
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221010233350.1198630-3-sbrivio@redhat.com \
--to=sbrivio@redhat.com \
--cc=david@gibson.dropbear.id.au \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).