From: Stefano Brivio <sbrivio@redhat.com>
To: passt-dev@passt.top
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v2 2/3] tcp, tcp_splice: Fix port remapping for inbound, spliced connections
Date: Wed, 12 Oct 2022 17:45:53 +0200 [thread overview]
Message-ID: <20221012154554.1650667-3-sbrivio@redhat.com> (raw)
In-Reply-To: <20221012154554.1650667-1-sbrivio@redhat.com>
In pasta mode, when we receive a new inbound connection, we need to
select a socket that was created in the namespace to proceed and
connect() it to its final destination.
The existing condition might pick a wrong socket, though, if the
destination port is remapped, because we'll check the bitmap of
inbound ports using the remapped port (stored in the epoll reference)
as index, and not the original port.
Instead of using the port bitmap for this purpose, store this
information in the epoll reference itself, by adding a new 'outbound'
bit, that's set if the listening socket was created the namespace,
and unset otherwise.
Then, use this bit to pick a socket on the right side.
Suggested-by: David Gibson <david@gibson.dropbear.id.au>
Fixes: 33482d5bf293 ("passt: Add PASTA mode, major rework")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
---
v2:
- use a bit in epoll reference to indicate whether a socket is for
inbound or outbound connections, instead of trying (and failing)
to rebuild that information from maps (David Gibson)
tcp.c | 7 +++----
tcp.h | 2 ++
tcp_splice.c | 20 +++++++++++++-------
3 files changed, 18 insertions(+), 11 deletions(-)
diff --git a/tcp.c b/tcp.c
index 63153b6..0d4ce57 100644
--- a/tcp.c
+++ b/tcp.c
@@ -3084,15 +3084,14 @@ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
const void *addr, const char *ifname, in_port_t port)
{
- union tcp_epoll_ref tref = { .tcp.listen = 1 };
+ union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.outbound = ns };
const void *bind_addr;
int s;
- if (ns) {
+ if (ns)
tref.tcp.index = (in_port_t)(port + c->tcp.fwd_out.delta[port]);
- } else {
+ else
tref.tcp.index = (in_port_t)(port + c->tcp.fwd_in.delta[port]);
- }
if (af == AF_INET || af == AF_UNSPEC) {
if (!addr && c->mode == MODE_PASTA)
diff --git a/tcp.h b/tcp.h
index 7ba7ab7..72e7815 100644
--- a/tcp.h
+++ b/tcp.h
@@ -34,6 +34,7 @@ void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s,
* union tcp_epoll_ref - epoll reference portion for TCP connections
* @listen: Set if this file descriptor is a listening socket
* @splice: Set if descriptor is associated to a spliced connection
+ * @outbound: Listening socket maps to outbound, spliced connection
* @v6: Set for IPv6 sockets or connections
* @timer: Reference is a timerfd descriptor for connection
* @index: Index of connection in table, or port for bound sockets
@@ -43,6 +44,7 @@ union tcp_epoll_ref {
struct {
uint32_t listen:1,
splice:1,
+ outbound:1,
v6:1,
timer:1,
index:20;
diff --git a/tcp_splice.c b/tcp_splice.c
index 96c31c8..99c5fa7 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -35,6 +35,7 @@
#include <fcntl.h>
#include <limits.h>
#include <stdint.h>
+#include <stdbool.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
@@ -506,19 +507,19 @@ static int tcp_splice_connect_ns(void *arg)
* @c: Execution context
* @conn: Connection pointer
* @port: Destination port, host order
+ * @outbound: Connection request coming from namespace
*
* Return: return code from connect()
*/
static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
- in_port_t port)
+ in_port_t port, int outbound)
{
- struct tcp_splice_connect_ns_arg ns_arg = { c, conn, port, 0 };
int *p, i, s = -1;
- if (bitmap_isset(c->tcp.fwd_in.map, port))
- p = CONN_V6(conn) ? ns_sock_pool6 : ns_sock_pool4;
- else
+ if (outbound)
p = CONN_V6(conn) ? init_sock_pool6 : init_sock_pool4;
+ else
+ p = CONN_V6(conn) ? ns_sock_pool6 : ns_sock_pool4;
for (i = 0; i < TCP_SOCK_POOL_SIZE; i++, p++) {
SWAP(s, *p);
@@ -526,11 +527,15 @@ static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
break;
}
- if (s < 0 && bitmap_isset(c->tcp.fwd_in.map, port)) {
+ /* No socket available in namespace: create a new one for connect() */
+ if (s < 0 && !outbound) {
+ struct tcp_splice_connect_ns_arg ns_arg = { c, conn, port, 0 };
+
NS_CALL(tcp_splice_connect_ns, &ns_arg);
return ns_arg.ret;
}
+ /* Otherwise, the socket will connect on the side it was created on */
return tcp_splice_connect(c, conn, s, port);
}
@@ -592,7 +597,8 @@ void tcp_sock_handler_splice(struct ctx *c, union epoll_ref ref,
conn->a = s;
conn->flags = ref.r.p.tcp.tcp.v6 ? SOCK_V6 : 0;
- if (tcp_splice_new(c, conn, ref.r.p.tcp.tcp.index))
+ if (tcp_splice_new(c, conn, ref.r.p.tcp.tcp.index,
+ ref.r.p.tcp.tcp.outbound))
conn_flag(c, conn, CLOSING);
return;
--
@@ -35,6 +35,7 @@
#include <fcntl.h>
#include <limits.h>
#include <stdint.h>
+#include <stdbool.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
@@ -506,19 +507,19 @@ static int tcp_splice_connect_ns(void *arg)
* @c: Execution context
* @conn: Connection pointer
* @port: Destination port, host order
+ * @outbound: Connection request coming from namespace
*
* Return: return code from connect()
*/
static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
- in_port_t port)
+ in_port_t port, int outbound)
{
- struct tcp_splice_connect_ns_arg ns_arg = { c, conn, port, 0 };
int *p, i, s = -1;
- if (bitmap_isset(c->tcp.fwd_in.map, port))
- p = CONN_V6(conn) ? ns_sock_pool6 : ns_sock_pool4;
- else
+ if (outbound)
p = CONN_V6(conn) ? init_sock_pool6 : init_sock_pool4;
+ else
+ p = CONN_V6(conn) ? ns_sock_pool6 : ns_sock_pool4;
for (i = 0; i < TCP_SOCK_POOL_SIZE; i++, p++) {
SWAP(s, *p);
@@ -526,11 +527,15 @@ static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
break;
}
- if (s < 0 && bitmap_isset(c->tcp.fwd_in.map, port)) {
+ /* No socket available in namespace: create a new one for connect() */
+ if (s < 0 && !outbound) {
+ struct tcp_splice_connect_ns_arg ns_arg = { c, conn, port, 0 };
+
NS_CALL(tcp_splice_connect_ns, &ns_arg);
return ns_arg.ret;
}
+ /* Otherwise, the socket will connect on the side it was created on */
return tcp_splice_connect(c, conn, s, port);
}
@@ -592,7 +597,8 @@ void tcp_sock_handler_splice(struct ctx *c, union epoll_ref ref,
conn->a = s;
conn->flags = ref.r.p.tcp.tcp.v6 ? SOCK_V6 : 0;
- if (tcp_splice_new(c, conn, ref.r.p.tcp.tcp.index))
+ if (tcp_splice_new(c, conn, ref.r.p.tcp.tcp.index,
+ ref.r.p.tcp.tcp.outbound))
conn_flag(c, conn, CLOSING);
return;
--
2.35.1
next prev parent reply other threads:[~2022-10-12 15:45 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-12 15:45 [PATCH v2 0/3] Fixes for spliced connections Stefano Brivio
2022-10-12 15:45 ` [PATCH v2 1/3] tcp, tcp_splice: Adjust comments to current meaning of inbound and outbound Stefano Brivio
2022-10-12 15:45 ` Stefano Brivio [this message]
2022-10-13 1:19 ` [PATCH v2 2/3] tcp, tcp_splice: Fix port remapping for inbound, spliced connections David Gibson
2022-10-12 15:45 ` [PATCH v2 3/3] tcp: Don't create 'tap' socket for ports that are bound to loopback only Stefano Brivio
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221012154554.1650667-3-sbrivio@redhat.com \
--to=sbrivio@redhat.com \
--cc=david@gibson.dropbear.id.au \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).