From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from gandalf.ozlabs.org (gandalf.ozlabs.org [150.107.74.76]) by passt.top (Postfix) with ESMTPS id A85885A0272 for ; Mon, 28 Aug 2023 07:42:06 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gibson.dropbear.id.au; s=201602; t=1693201322; bh=jN+j1sCn18kcnHaDbmZ6436koFkLriLRsPn6oC8YaWI=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=HGpLJvWf4AGCbCeW16OiY48YennSZe9T+Tcm4Qa7kibDXYcwOwCKvr5w1xzweNbIR VkaVvXXc7EkmLCV1U0VgMUzqcHDVgDM8GjVYKXby1J5CstflFr4dg7bRIRLv86uGmJ eHAehsL+Guj7iT0Sm/JTXDoyjkO9ss3ZeXYgW8jw= Received: by gandalf.ozlabs.org (Postfix, from userid 1007) id 4RYzvt5Wdqz4wxx; Mon, 28 Aug 2023 15:42:02 +1000 (AEST) From: David Gibson To: passt-dev@passt.top, Stefano Brivio Subject: [PATCH v2 02/10] flow, tcp: Move TCP connection table to unified flow table Date: Mon, 28 Aug 2023 15:41:38 +1000 Message-ID: <20230828054146.48673-3-david@gibson.dropbear.id.au> X-Mailer: git-send-email 2.41.0 In-Reply-To: <20230828054146.48673-1-david@gibson.dropbear.id.au> References: <20230828054146.48673-1-david@gibson.dropbear.id.au> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Message-ID-Hash: 2YFYRWIGAY7K4UORAEVL2WCVCLWZPLBZ X-Message-ID-Hash: 2YFYRWIGAY7K4UORAEVL2WCVCLWZPLBZ X-MailFrom: dgibson@gandalf.ozlabs.org X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header CC: David Gibson X-Mailman-Version: 3.3.8 Precedence: list List-Id: Development discussion and patches for passt Archived-At: Archived-At: List-Archive: List-Archive: List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: We want to generalise "connection" tracking to things other than true TCP connections. Continue implenenting this by renaming the TCP connection table to the "flow table" and moving it to flow.c. The definitions are split between flow.h and flow_table.h - we need this separation to avoid circular dependencies: the definitions in flow.h will be needed by many headers using the flow mechanism, but flow_table.h needs all those protocol specific headers in order to define the full flow table entry. Signed-off-by: David Gibson --- Makefile | 8 ++--- flow.c | 11 +++++++ flow.h | 8 +++++ flow_table.h | 25 +++++++++++++++ passt.h | 3 ++ tcp.c | 87 +++++++++++++++++++++++++--------------------------- tcp.h | 5 --- tcp_conn.h | 23 +++----------- tcp_splice.c | 19 ++++++------ 9 files changed, 107 insertions(+), 82 deletions(-) create mode 100644 flow_table.h diff --git a/Makefile b/Makefile index c5a3ce7..73c0ef7 100644 --- a/Makefile +++ b/Makefile @@ -51,10 +51,10 @@ SRCS = $(PASST_SRCS) $(QRAP_SRCS) MANPAGES = passt.1 pasta.1 qrap.1 -PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h icmp.h \ - inany.h isolation.h lineread.h log.h ndp.h netlink.h packet.h passt.h \ - pasta.h pcap.h port_fwd.h siphash.h tap.h tcp.h tcp_conn.h \ - tcp_splice.h udp.h util.h +PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h \ + flow_table.h icmp.h inany.h isolation.h lineread.h log.h ndp.h \ + netlink.h packet.h passt.h pasta.h pcap.h port_fwd.h siphash.h tap.h \ + tcp.h tcp_conn.h tcp_splice.h udp.h util.h HEADERS = $(PASST_HEADERS) seccomp.h C := \#include \nstruct tcp_info x = { .tcpi_snd_wnd = 0 }; diff --git a/flow.c b/flow.c index c3802ce..864158a 100644 --- a/flow.c +++ b/flow.c @@ -5,10 +5,21 @@ * Tracking for logical "flows" of packets. */ +#include +#include + +#include "util.h" +#include "passt.h" +#include "inany.h" #include "flow.h" +#include "tcp_conn.h" +#include "flow_table.h" const char *flow_type_str[] = { [FLOW_NONE] = "", [FLOW_TCP] = "TCP connection", [FLOW_TCP_SPLICE] = "TCP connection (spliced)", }; + +/* Global Flow Table */ +union flow flowtab[FLOW_MAX]; diff --git a/flow.h b/flow.h index 1afc1e5..ce497cf 100644 --- a/flow.h +++ b/flow.h @@ -26,4 +26,12 @@ struct flow_common { enum flow_type type; }; +#define FLOW_INDEX_BITS 17 /* 128k - 1 */ +#define FLOW_MAX MAX_FROM_BITS(FLOW_INDEX_BITS) + +#define FLOW_TABLE_PRESSURE 30 /* % of FLOW_MAX */ +#define FLOW_FILE_PRESSURE 30 /* % of c->nofile */ + +union flow; + #endif /* FLOW_H */ diff --git a/flow_table.h b/flow_table.h new file mode 100644 index 0000000..c4c646b --- /dev/null +++ b/flow_table.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright Red Hat + * Author: David Gibson + * + * Definitions for the global table of packet flows. + */ +#ifndef FLOW_TABLE_H +#define FLOW_TABLE_H + +/** + * union flow - Descriptor for a logical packet flow (e.g. connection) + * @f: Fields common between all variants + * @tcp: Fields for non-spliced TCP connections + * @tcp_splice: Fields for spliced TCP connections +*/ +union flow { + struct flow_common f; + struct tcp_tap_conn tcp; + struct tcp_splice_conn tcp_splice; +}; + +/* Global Flow Table */ +extern union flow flowtab[]; + +#endif /* FLOW_TABLE_H */ diff --git a/passt.h b/passt.h index 282bd1a..023b7e0 100644 --- a/passt.h +++ b/passt.h @@ -220,6 +220,7 @@ struct ip6_ctx { * @pasta_conf_ns: Configure namespace after creating it * @no_copy_routes: Don't copy all routes when configuring target namespace * @no_copy_addrs: Don't copy all addresses when configuring namespace + * @flow_count: Number of tracked packet flows (connections etc.) * @no_tcp: Disable TCP operation * @tcp: Context for TCP protocol handler * @no_tcp: Disable UDP operation @@ -281,6 +282,8 @@ struct ctx { int no_copy_routes; int no_copy_addrs; + unsigned flow_count; + int no_tcp; struct tcp_ctx tcp; int no_udp; diff --git a/tcp.c b/tcp.c index 75930b1..7994197 100644 --- a/tcp.c +++ b/tcp.c @@ -305,14 +305,14 @@ #include "flow.h" #include "tcp_conn.h" +#include "flow_table.h" #define TCP_FRAMES_MEM 128 #define TCP_FRAMES \ (c->mode == MODE_PASST ? TCP_FRAMES_MEM : 1) #define TCP_HASH_TABLE_LOAD 70 /* % */ -#define TCP_HASH_TABLE_SIZE (TCP_MAX_CONNS * 100 / \ - TCP_HASH_TABLE_LOAD) +#define TCP_HASH_TABLE_SIZE (FLOW_MAX * 100 / TCP_HASH_TABLE_LOAD) #define MAX_WS 8 #define MAX_WINDOW (1 << (16 + (MAX_WS))) @@ -561,11 +561,8 @@ tcp6_l2_flags_buf[TCP_FRAMES_MEM]; static unsigned int tcp6_l2_flags_buf_used; -/* TCP connections */ -union tcp_conn tc[TCP_MAX_CONNS]; - -#define CONN(index) (&tc[(index)].tap) -#define CONN_IDX(conn) ((union tcp_conn *)(conn) - tc) +#define CONN(index) (&flowtab[(index)].tcp) +#define CONN_IDX(conn) ((union flow *)(conn) - flowtab) /** conn_at_idx() - Find a connection by index, if present * @index: Index of connection to lookup @@ -574,7 +571,7 @@ union tcp_conn tc[TCP_MAX_CONNS]; */ static inline struct tcp_tap_conn *conn_at_idx(int index) { - if ((index < 0) || (index >= TCP_MAX_CONNS)) + if ((index < 0) || (index >= FLOW_MAX)) return NULL; ASSERT(CONN(index)->f.type == FLOW_TCP); return CONN(index); @@ -1300,26 +1297,26 @@ static struct tcp_tap_conn *tcp_hash_lookup(const struct ctx *c, * @c: Execution context * @hole: Pointer to recently closed connection */ -void tcp_table_compact(struct ctx *c, union tcp_conn *hole) +void tcp_table_compact(struct ctx *c, union flow *hole) { - union tcp_conn *from; + union flow *from; - if (CONN_IDX(hole) == --c->tcp.conn_count) { + if (CONN_IDX(hole) == --c->flow_count) { debug("TCP: table compaction: maximum index was %li (%p)", CONN_IDX(hole), hole); memset(hole, 0, sizeof(*hole)); return; } - from = tc + c->tcp.conn_count; + from = flowtab + c->flow_count; memcpy(hole, from, sizeof(*hole)); switch (from->f.type) { case FLOW_TCP: - tcp_tap_conn_update(c, &from->tap, &hole->tap); + tcp_tap_conn_update(c, &from->tcp, &hole->tcp); break; case FLOW_TCP_SPLICE: - tcp_splice_conn_update(c, &hole->splice); + tcp_splice_conn_update(c, &hole->tcp_splice); break; default: die("Unexpected %s in tcp_table_compact()", @@ -1336,18 +1333,18 @@ void tcp_table_compact(struct ctx *c, union tcp_conn *hole) /** * tcp_conn_destroy() - Close sockets, trigger hash table removal and compaction * @c: Execution context - * @conn_union: Connection pointer (container union) + * @flow: Flow table entry for this connection */ -static void tcp_conn_destroy(struct ctx *c, union tcp_conn *conn_union) +static void tcp_conn_destroy(struct ctx *c, union flow *flow) { - struct tcp_tap_conn *conn = &conn_union->tap; + struct tcp_tap_conn *conn = &flow->tcp; close(conn->sock); if (conn->timer != -1) close(conn->timer); tcp_hash_remove(c, conn); - tcp_table_compact(c, conn_union); + tcp_table_compact(c, flow); } static void tcp_rst_do(struct ctx *c, struct tcp_tap_conn *conn); @@ -1390,24 +1387,24 @@ static void tcp_l2_data_buf_flush(struct ctx *c) */ void tcp_defer_handler(struct ctx *c) { - union tcp_conn *conn; + union flow *flow; tcp_l2_flags_buf_flush(c); tcp_l2_data_buf_flush(c); - for (conn = tc + c->tcp.conn_count - 1; conn >= tc; conn--) { - switch (conn->f.type) { + for (flow = flowtab + c->flow_count - 1; flow >= flowtab; flow--) { + switch (flow->f.type) { case FLOW_TCP: - if (conn->tap.events == CLOSED) - tcp_conn_destroy(c, conn); + if (flow->tcp.events == CLOSED) + tcp_conn_destroy(c, flow); break; case FLOW_TCP_SPLICE: - if (conn->splice.flags & CLOSING) - tcp_splice_destroy(c, conn); + if (flow->tcp_splice.flags & CLOSING) + tcp_splice_destroy(c, flow); break; default: die("Unexpected %s in tcp_defer_handler()", - FLOW_TYPE(&conn->f)); + FLOW_TYPE(&flow->f)); } } } @@ -2016,7 +2013,7 @@ static void tcp_conn_from_tap(struct ctx *c, (void)saddr; - if (c->tcp.conn_count >= TCP_MAX_CONNS) + if (c->flow_count >= FLOW_MAX) return; if ((s = tcp_conn_pool_sock(pool)) < 0) @@ -2042,7 +2039,7 @@ static void tcp_conn_from_tap(struct ctx *c, } } - conn = CONN(c->tcp.conn_count++); + conn = CONN(c->flow_count++); conn->f.type = FLOW_TCP; conn->sock = s; conn->timer = -1; @@ -2762,11 +2759,11 @@ void tcp_listen_handler(struct ctx *c, union epoll_ref ref, const struct timespec *now) { struct sockaddr_storage sa; - union tcp_conn *conn; + union flow *flow; socklen_t sl; int s; - if (c->no_tcp || c->tcp.conn_count >= TCP_MAX_CONNS) + if (c->no_tcp || c->flow_count >= FLOW_MAX) return; sl = sizeof(sa); @@ -2779,14 +2776,14 @@ void tcp_listen_handler(struct ctx *c, union epoll_ref ref, if (s < 0) return; - conn = tc + c->tcp.conn_count++; + flow = flowtab + c->flow_count++; if (c->mode == MODE_PASTA && - tcp_splice_conn_from_sock(c, ref.tcp_listen, &conn->splice, + tcp_splice_conn_from_sock(c, ref.tcp_listen, &flow->tcp_splice, s, (struct sockaddr *)&sa)) return; - tcp_tap_conn_from_sock(c, ref.tcp_listen, &conn->tap, s, + tcp_tap_conn_from_sock(c, ref.tcp_listen, &flow->tcp, s, (struct sockaddr *)&sa, now); } @@ -2915,18 +2912,18 @@ static void tcp_tap_sock_handler(struct ctx *c, struct tcp_tap_conn *conn, */ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events) { - union tcp_conn *conn = tc + ref.tcp.index; + union flow *flow = flowtab + ref.tcp.index; - switch (conn->f.type) { + switch (flow->f.type) { case FLOW_TCP: - tcp_tap_sock_handler(c, &conn->tap, events); + tcp_tap_sock_handler(c, &flow->tcp, events); break; case FLOW_TCP_SPLICE: - tcp_splice_sock_handler(c, &conn->splice, ref.fd, events); + tcp_splice_sock_handler(c, &flow->tcp_splice, ref.fd, events); break; default: die("Unexpected %s in tcp_sock_handler_compact()", - FLOW_TYPE(&conn->f)); + FLOW_TYPE(&flow->f)); } } @@ -3291,7 +3288,7 @@ static int tcp_port_rebind(void *arg) */ void tcp_timer(struct ctx *c, const struct timespec *ts) { - union tcp_conn *conn; + union flow *flow; (void)ts; @@ -3314,18 +3311,18 @@ void tcp_timer(struct ctx *c, const struct timespec *ts) } } - for (conn = tc + c->tcp.conn_count - 1; conn >= tc; conn--) { - switch (conn->f.type) { + for (flow = flowtab + c->flow_count - 1; flow >= flowtab; flow--) { + switch (flow->f.type) { case FLOW_TCP: - if (conn->tap.events == CLOSED) - tcp_conn_destroy(c, conn); + if (flow->tcp.events == CLOSED) + tcp_conn_destroy(c, flow); break; case FLOW_TCP_SPLICE: - tcp_splice_timer(c, conn); + tcp_splice_timer(c, flow); break; default: die("Unexpected %s in tcp_timer()", - FLOW_TYPE(&conn->f)); + FLOW_TYPE(&flow->f)); } } diff --git a/tcp.h b/tcp.h index 9eaec3f..4c7b8a4 100644 --- a/tcp.h +++ b/tcp.h @@ -8,9 +8,6 @@ #define TCP_TIMER_INTERVAL 1000 /* ms */ -#define TCP_CONN_INDEX_BITS 17 /* 128k - 1 */ -#define TCP_MAX_CONNS MAX_FROM_BITS(TCP_CONN_INDEX_BITS) - struct ctx; void tcp_timer_handler(struct ctx *c, union epoll_ref ref); @@ -55,7 +52,6 @@ union tcp_listen_epoll_ref { /** * struct tcp_ctx - Execution context for TCP routines * @hash_secret: 128-bit secret for hash functions, ISN and hash table - * @conn_count: Count of total connections in connection table * @port_to_tap: Ports bound host-side, packets to tap or spliced * @fwd_in: Port forwarding configuration for inbound packets * @fwd_out: Port forwarding configuration for outbound packets @@ -65,7 +61,6 @@ union tcp_listen_epoll_ref { */ struct tcp_ctx { uint64_t hash_secret[2]; - int conn_count; struct port_fwd fwd_in; struct port_fwd fwd_out; struct timespec timer_run; diff --git a/tcp_conn.h b/tcp_conn.h index 0074a08..a7c7001 100644 --- a/tcp_conn.h +++ b/tcp_conn.h @@ -40,7 +40,7 @@ struct tcp_tap_conn { struct flow_common f; bool in_epoll :1; - int next_index :TCP_CONN_INDEX_BITS + 2; + int next_index :FLOW_INDEX_BITS + 2; #define TCP_RETRANS_BITS 3 unsigned int retrans :TCP_RETRANS_BITS; @@ -159,21 +159,6 @@ struct tcp_splice_conn { uint32_t b_written; }; -/** - * union tcp_conn - Descriptor for a TCP connection (spliced or non-spliced) - * @c: Fields common between all variants - * @tap: Fields specific to non-spliced connections - * @splice: Fields specific to spliced connections -*/ -union tcp_conn { - struct flow_common f; - struct tcp_tap_conn tap; - struct tcp_splice_conn splice; -}; - -/* TCP connections */ -extern union tcp_conn tc[]; - /* Socket pools */ #define TCP_SOCK_POOL_SIZE 32 @@ -181,9 +166,9 @@ extern int init_sock_pool4 [TCP_SOCK_POOL_SIZE]; extern int init_sock_pool6 [TCP_SOCK_POOL_SIZE]; void tcp_splice_conn_update(struct ctx *c, struct tcp_splice_conn *new); -void tcp_table_compact(struct ctx *c, union tcp_conn *hole); -void tcp_splice_destroy(struct ctx *c, union tcp_conn *conn_union); -void tcp_splice_timer(struct ctx *c, union tcp_conn *conn_union); +void tcp_table_compact(struct ctx *c, union flow *hole); +void tcp_splice_destroy(struct ctx *c, union flow *flow); +void tcp_splice_timer(struct ctx *c, union flow *flow); int tcp_conn_pool_sock(int pool[]); int tcp_conn_new_sock(const struct ctx *c, sa_family_t af); void tcp_sock_refill_pool(const struct ctx *c, int pool[], int af); diff --git a/tcp_splice.c b/tcp_splice.c index 840d639..72346b8 100644 --- a/tcp_splice.c +++ b/tcp_splice.c @@ -56,6 +56,7 @@ #include "flow.h" #include "tcp_conn.h" +#include "flow_table.h" #define MAX_PIPE_SIZE (8UL * 1024 * 1024) #define TCP_SPLICE_PIPE_POOL_SIZE 16 @@ -75,7 +76,7 @@ static int splice_pipe_pool [TCP_SPLICE_PIPE_POOL_SIZE][2][2]; #define CONN_V4(x) (!CONN_V6(x)) #define CONN_HAS(conn, set) ((conn->events & (set)) == (set)) #define CONN(index) (&tc[(index)].splice) -#define CONN_IDX(conn) ((union tcp_conn *)(conn) - tc) +#define CONN_IDX(conn) ((union flow *)(conn) - flowtab) /* Display strings for connection events */ static const char *tcp_splice_event_str[] __attribute((__unused__)) = { @@ -263,11 +264,11 @@ void tcp_splice_conn_update(struct ctx *c, struct tcp_splice_conn *new) /** * tcp_splice_destroy() - Close spliced connection and pipes, clear * @c: Execution context - * @conn_union: Spliced connection (container union) + * @flow: Flow table entry */ -void tcp_splice_destroy(struct ctx *c, union tcp_conn *conn_union) +void tcp_splice_destroy(struct ctx *c, union flow *flow) { - struct tcp_splice_conn *conn = &conn_union->splice; + struct tcp_splice_conn *conn = &flow->tcp_splice; if (conn->events & SPLICE_ESTABLISHED) { /* Flushing might need to block: don't recycle them. */ @@ -296,7 +297,7 @@ void tcp_splice_destroy(struct ctx *c, union tcp_conn *conn_union) conn->flags = 0; debug("TCP (spliced): index %li, CLOSED", CONN_IDX(conn)); - tcp_table_compact(c, conn_union); + tcp_table_compact(c, flow); } /** @@ -835,14 +836,14 @@ void tcp_splice_init(struct ctx *c) /** * tcp_splice_timer() - Timer for spliced connections * @c: Execution context - * @conn_union: Spliced connection (container union) + * @flow: Flow table entry */ -void tcp_splice_timer(struct ctx *c, union tcp_conn *conn_union) +void tcp_splice_timer(struct ctx *c, union flow *flow) { - struct tcp_splice_conn *conn = &conn_union->splice; + struct tcp_splice_conn *conn = &flow->tcp_splice; if (conn->flags & CLOSING) { - tcp_splice_destroy(c, conn_union); + tcp_splice_destroy(c, flow); return; } -- 2.41.0