* [PATCH v2 01/11] flow, tcp: Generalise connection types
2023-11-26 23:33 [PATCH v2 00/11] Introduce unified flow table, first steps David Gibson
@ 2023-11-26 23:33 ` David Gibson
2023-11-26 23:33 ` [PATCH v2 02/11] flow, tcp: Move TCP connection table to unified flow table David Gibson
` (9 subsequent siblings)
10 siblings, 0 replies; 18+ messages in thread
From: David Gibson @ 2023-11-26 23:33 UTC (permalink / raw)
To: Stefano Brivio, passt-dev; +Cc: David Gibson
Currently TCP connections use a 1-bit selector, 'spliced', to determine the
rest of the contents of the structure. We want to generalise the TCP
connection table to other types of flows in other protocols. Make a start
on this by replacing the tcp_conn_common structure with a new flow_common
structure with an enum rather than a simple boolean indicating the type of
flow.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
Makefile | 8 +++----
flow.c | 16 +++++++++++++
flow.h | 36 ++++++++++++++++++++++++++++++
tcp.c | 63 +++++++++++++++++++++++++++++++++++++---------------
tcp_conn.h | 24 ++++++--------------
tcp_splice.c | 3 ++-
6 files changed, 110 insertions(+), 40 deletions(-)
create mode 100644 flow.c
create mode 100644 flow.h
diff --git a/Makefile b/Makefile
index 6c53695..e2970e3 100644
--- a/Makefile
+++ b/Makefile
@@ -44,15 +44,15 @@ FLAGS += -DARCH=\"$(TARGET_ARCH)\"
FLAGS += -DVERSION=\"$(VERSION)\"
FLAGS += -DDUAL_STACK_SOCKETS=$(DUAL_STACK_SOCKETS)
-PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c icmp.c igmp.c \
- isolation.c lineread.c log.c mld.c ndp.c netlink.c packet.c passt.c \
- pasta.c pcap.c port_fwd.c tap.c tcp.c tcp_splice.c udp.c util.c
+PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c flow.c icmp.c \
+ igmp.c isolation.c lineread.c log.c mld.c ndp.c netlink.c packet.c \
+ passt.c pasta.c pcap.c port_fwd.c tap.c tcp.c tcp_splice.c udp.c util.c
QRAP_SRCS = qrap.c
SRCS = $(PASST_SRCS) $(QRAP_SRCS)
MANPAGES = passt.1 pasta.1 qrap.1
-PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h icmp.h \
+PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h icmp.h \
inany.h isolation.h lineread.h log.h ndp.h netlink.h packet.h passt.h \
pasta.h pcap.h pif.h port_fwd.h siphash.h tap.h tcp.h tcp_conn.h \
tcp_splice.h udp.h util.h
diff --git a/flow.c b/flow.c
new file mode 100644
index 0000000..7c3603c
--- /dev/null
+++ b/flow.c
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright Red Hat
+ * Author: David Gibson <david@gibson.dropbear.id.au>
+ *
+ * Tracking for logical "flows" of packets.
+ */
+
+#include <stdint.h>
+
+#include "flow.h"
+
+const char *flow_type_str[] = {
+ [FLOW_TYPE_NONE] = "<none>",
+ [FLOW_TCP] = "TCP connection",
+ [FLOW_TCP_SPLICE] = "TCP connection (spliced)",
+};
diff --git a/flow.h b/flow.h
new file mode 100644
index 0000000..88e6f0b
--- /dev/null
+++ b/flow.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright Red Hat
+ * Author: David Gibson <david@gibson.dropbear.id.au>
+ *
+ * Tracking for logical "flows" of packets.
+ */
+#ifndef FLOW_H
+#define FLOW_H
+
+/**
+ * enum flow_type - Different types of packet flows we track
+ */
+enum flow_type {
+ /* Represents an invalid or unused flow */
+ FLOW_TYPE_NONE = 0,
+ /* A TCP connection between a socket and tap interface */
+ FLOW_TCP,
+ /* A TCP connection between a host socket and ns socket */
+ FLOW_TCP_SPLICE,
+
+ FLOW_TYPE_MAX = FLOW_TCP_SPLICE,
+};
+
+extern const char *flow_type_str[];
+#define FLOW_TYPE(f) \
+ ((f)->type <= FLOW_TYPE_MAX ? flow_type_str[(f)->type] : "?")
+
+/**
+ * struct flow_common - Common fields for packet flows
+ * @type: Type of packet flow
+ */
+struct flow_common {
+ uint8_t type;
+};
+
+#endif /* FLOW_H */
diff --git a/tcp.c b/tcp.c
index 40e3dec..1c25032 100644
--- a/tcp.c
+++ b/tcp.c
@@ -299,6 +299,7 @@
#include "tcp_splice.h"
#include "log.h"
#include "inany.h"
+#include "flow.h"
#include "tcp_conn.h"
@@ -584,7 +585,7 @@ static inline struct tcp_tap_conn *conn_at_idx(int idx)
{
if ((idx < 0) || (idx >= TCP_MAX_CONNS))
return NULL;
- ASSERT(!(CONN(idx)->c.spliced));
+ ASSERT(CONN(idx)->f.type == FLOW_TCP);
return CONN(idx);
}
@@ -1319,14 +1320,21 @@ void tcp_table_compact(struct ctx *c, union tcp_conn *hole)
from = tc + c->tcp.conn_count;
memcpy(hole, from, sizeof(*hole));
- if (from->c.spliced)
- tcp_splice_conn_update(c, &hole->splice);
- else
+ switch (from->f.type) {
+ case FLOW_TCP:
tcp_tap_conn_update(c, &from->tap, &hole->tap);
+ break;
+ case FLOW_TCP_SPLICE:
+ tcp_splice_conn_update(c, &hole->splice);
+ break;
+ default:
+ die("Unexpected %s in tcp_table_compact()",
+ FLOW_TYPE(&from->f));
+ }
- debug("TCP: table compaction (spliced=%d): old index %li, new index %li, "
+ debug("TCP: table compaction (%s): old index %li, new index %li, "
"from: %p, to: %p",
- from->c.spliced, CONN_IDX(from), CONN_IDX(hole),
+ FLOW_TYPE(&from->f), CONN_IDX(from), CONN_IDX(hole),
(void *)from, (void *)hole);
memset(from, 0, sizeof(*from));
@@ -1402,12 +1410,18 @@ void tcp_defer_handler(struct ctx *c)
tcp_l2_data_buf_flush(c);
for (conn = tc + c->tcp.conn_count - 1; conn >= tc; conn--) {
- if (conn->c.spliced) {
- if (conn->splice.flags & CLOSING)
- tcp_splice_destroy(c, conn);
- } else {
+ switch (conn->f.type) {
+ case FLOW_TCP:
if (conn->tap.events == CLOSED)
tcp_conn_destroy(c, conn);
+ break;
+ case FLOW_TCP_SPLICE:
+ if (conn->splice.flags & CLOSING)
+ tcp_splice_destroy(c, conn);
+ break;
+ default:
+ die("Unexpected %s in tcp_defer_handler()",
+ FLOW_TYPE(&conn->f));
}
}
}
@@ -2017,7 +2031,7 @@ static void tcp_conn_from_tap(struct ctx *c,
}
conn = CONN(c->tcp.conn_count++);
- conn->c.spliced = false;
+ conn->f.type = FLOW_TCP;
conn->sock = s;
conn->timer = -1;
conn_event(c, conn, TAP_SYN_RCVD);
@@ -2726,7 +2740,7 @@ static void tcp_tap_conn_from_sock(struct ctx *c,
const struct sockaddr *sa,
const struct timespec *now)
{
- conn->c.spliced = false;
+ conn->f.type = FLOW_TCP;
conn->sock = s;
conn->timer = -1;
conn->ws_to_tap = conn->ws_from_tap = 0;
@@ -2909,10 +2923,17 @@ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events)
{
union tcp_conn *conn = tc + ref.tcp.index;
- if (conn->c.spliced)
- tcp_splice_sock_handler(c, &conn->splice, ref.fd, events);
- else
+ switch (conn->f.type) {
+ case FLOW_TCP:
tcp_tap_sock_handler(c, &conn->tap, events);
+ break;
+ case FLOW_TCP_SPLICE:
+ tcp_splice_sock_handler(c, &conn->splice, ref.fd, events);
+ break;
+ default:
+ die("Unexpected %s in tcp_sock_handler_compact()",
+ FLOW_TYPE(&conn->f));
+ }
}
/**
@@ -3244,11 +3265,17 @@ void tcp_timer(struct ctx *c, const struct timespec *ts)
}
for (conn = tc + c->tcp.conn_count - 1; conn >= tc; conn--) {
- if (conn->c.spliced) {
- tcp_splice_timer(c, conn);
- } else {
+ switch (conn->f.type) {
+ case FLOW_TCP:
if (conn->tap.events == CLOSED)
tcp_conn_destroy(c, conn);
+ break;
+ case FLOW_TCP_SPLICE:
+ tcp_splice_timer(c, conn);
+ break;
+ default:
+ die("Unexpected %s in tcp_timer()",
+ FLOW_TYPE(&conn->f));
}
}
diff --git a/tcp_conn.h b/tcp_conn.h
index 0c6a35b..136f8da 100644
--- a/tcp_conn.h
+++ b/tcp_conn.h
@@ -9,19 +9,9 @@
#ifndef TCP_CONN_H
#define TCP_CONN_H
-/**
- * struct tcp_conn_common - Common fields for spliced and non-spliced
- * @spliced: Is this a spliced connection?
- */
-struct tcp_conn_common {
- bool spliced :1;
-};
-
-extern const char *tcp_common_flag_str[];
-
/**
* struct tcp_tap_conn - Descriptor for a TCP connection (not spliced)
- * @c: Fields common with tcp_splice_conn
+ * @f: Generic flow information
* @in_epoll: Is the connection in the epoll set?
* @next_index: Connection index of next item in hash chain, -1 for none
* @tap_mss: MSS advertised by tap/guest, rounded to 2 ^ TCP_MSS_BITS
@@ -46,8 +36,8 @@ extern const char *tcp_common_flag_str[];
* @seq_init_from_tap: Initial sequence number from tap
*/
struct tcp_tap_conn {
- /* Must be first element to match tcp_splice_conn */
- struct tcp_conn_common c;
+ /* Must be first element */
+ struct flow_common f;
bool in_epoll :1;
int next_index :TCP_CONN_INDEX_BITS + 2;
@@ -121,7 +111,7 @@ struct tcp_tap_conn {
#define SIDES 2
/**
* struct tcp_splice_conn - Descriptor for a spliced TCP connection
- * @c: Fields common with tcp_tap_conn
+ * @f: Generic flow information
* @in_epoll: Is the connection in the epoll set?
* @s: File descriptor for sockets
* @pipe: File descriptors for pipes
@@ -131,8 +121,8 @@ struct tcp_tap_conn {
* @written: Bytes written (not fully written from one other side read)
*/
struct tcp_splice_conn {
- /* Must be first element to match tcp_tap_conn */
- struct tcp_conn_common c;
+ /* Must be first element */
+ struct flow_common f;
bool in_epoll :1;
int s[SIDES];
@@ -168,7 +158,7 @@ struct tcp_splice_conn {
* @splice: Fields specific to spliced connections
*/
union tcp_conn {
- struct tcp_conn_common c;
+ struct flow_common f;
struct tcp_tap_conn tap;
struct tcp_splice_conn splice;
};
diff --git a/tcp_splice.c b/tcp_splice.c
index a5c1332..bfd2bd1 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -54,6 +54,7 @@
#include "tcp_splice.h"
#include "siphash.h"
#include "inany.h"
+#include "flow.h"
#include "tcp_conn.h"
@@ -476,7 +477,7 @@ bool tcp_splice_conn_from_sock(const struct ctx *c,
if (setsockopt(s, SOL_TCP, TCP_QUICKACK, &((int){ 1 }), sizeof(int)))
trace("TCP (spliced): failed to set TCP_QUICKACK on %i", s);
- conn->c.spliced = true;
+ conn->f.type = FLOW_TCP_SPLICE;
conn->s[0] = s;
if (tcp_splice_new(c, conn, ref.port, ref.pif))
--
@@ -54,6 +54,7 @@
#include "tcp_splice.h"
#include "siphash.h"
#include "inany.h"
+#include "flow.h"
#include "tcp_conn.h"
@@ -476,7 +477,7 @@ bool tcp_splice_conn_from_sock(const struct ctx *c,
if (setsockopt(s, SOL_TCP, TCP_QUICKACK, &((int){ 1 }), sizeof(int)))
trace("TCP (spliced): failed to set TCP_QUICKACK on %i", s);
- conn->c.spliced = true;
+ conn->f.type = FLOW_TCP_SPLICE;
conn->s[0] = s;
if (tcp_splice_new(c, conn, ref.port, ref.pif))
--
2.43.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 02/11] flow, tcp: Move TCP connection table to unified flow table
2023-11-26 23:33 [PATCH v2 00/11] Introduce unified flow table, first steps David Gibson
2023-11-26 23:33 ` [PATCH v2 01/11] flow, tcp: Generalise connection types David Gibson
@ 2023-11-26 23:33 ` David Gibson
2023-11-26 23:33 ` [PATCH v2 03/11] flow, tcp: Consolidate flow pointer<->index helpers David Gibson
` (8 subsequent siblings)
10 siblings, 0 replies; 18+ messages in thread
From: David Gibson @ 2023-11-26 23:33 UTC (permalink / raw)
To: Stefano Brivio, passt-dev; +Cc: David Gibson
We want to generalise "connection" tracking to things other than true TCP
connections. Continue implenenting this by renaming the TCP connection
table to the "flow table" and moving it to flow.c. The definitions are
split between flow.h and flow_table.h - we need this separation to avoid
circular dependencies: the definitions in flow.h will be needed by many
headers using the flow mechanism, but flow_table.h needs all those protocol
specific headers in order to define the full flow table entry.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
Makefile | 8 ++---
flow.c | 11 +++++++
flow.h | 8 +++++
flow_table.h | 25 +++++++++++++++
passt.h | 3 ++
tcp.c | 87 +++++++++++++++++++++++++---------------------------
tcp.h | 5 ---
tcp_conn.h | 23 +++-----------
tcp_splice.c | 19 ++++++------
9 files changed, 107 insertions(+), 82 deletions(-)
create mode 100644 flow_table.h
diff --git a/Makefile b/Makefile
index e2970e3..d14b029 100644
--- a/Makefile
+++ b/Makefile
@@ -52,10 +52,10 @@ SRCS = $(PASST_SRCS) $(QRAP_SRCS)
MANPAGES = passt.1 pasta.1 qrap.1
-PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h icmp.h \
- inany.h isolation.h lineread.h log.h ndp.h netlink.h packet.h passt.h \
- pasta.h pcap.h pif.h port_fwd.h siphash.h tap.h tcp.h tcp_conn.h \
- tcp_splice.h udp.h util.h
+PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h \
+ flow_table.h icmp.h inany.h isolation.h lineread.h log.h ndp.h \
+ netlink.h packet.h passt.h pasta.h pcap.h pif.h port_fwd.h siphash.h \
+ tap.h tcp.h tcp_conn.h tcp_splice.h udp.h util.h
HEADERS = $(PASST_HEADERS) seccomp.h
C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_snd_wnd = 0 };
diff --git a/flow.c b/flow.c
index 7c3603c..a9bb8f5 100644
--- a/flow.c
+++ b/flow.c
@@ -6,11 +6,22 @@
*/
#include <stdint.h>
+#include <unistd.h>
+#include <string.h>
+#include "util.h"
+#include "passt.h"
+#include "siphash.h"
+#include "inany.h"
#include "flow.h"
+#include "tcp_conn.h"
+#include "flow_table.h"
const char *flow_type_str[] = {
[FLOW_TYPE_NONE] = "<none>",
[FLOW_TCP] = "TCP connection",
[FLOW_TCP_SPLICE] = "TCP connection (spliced)",
};
+
+/* Global Flow Table */
+union flow flowtab[FLOW_MAX];
diff --git a/flow.h b/flow.h
index 88e6f0b..57289a8 100644
--- a/flow.h
+++ b/flow.h
@@ -33,4 +33,12 @@ struct flow_common {
uint8_t type;
};
+#define FLOW_INDEX_BITS 17 /* 128k - 1 */
+#define FLOW_MAX MAX_FROM_BITS(FLOW_INDEX_BITS)
+
+#define FLOW_TABLE_PRESSURE 30 /* % of FLOW_MAX */
+#define FLOW_FILE_PRESSURE 30 /* % of c->nofile */
+
+union flow;
+
#endif /* FLOW_H */
diff --git a/flow_table.h b/flow_table.h
new file mode 100644
index 0000000..c4c646b
--- /dev/null
+++ b/flow_table.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright Red Hat
+ * Author: David Gibson <david@gibson.dropbear.id.au>
+ *
+ * Definitions for the global table of packet flows.
+ */
+#ifndef FLOW_TABLE_H
+#define FLOW_TABLE_H
+
+/**
+ * union flow - Descriptor for a logical packet flow (e.g. connection)
+ * @f: Fields common between all variants
+ * @tcp: Fields for non-spliced TCP connections
+ * @tcp_splice: Fields for spliced TCP connections
+*/
+union flow {
+ struct flow_common f;
+ struct tcp_tap_conn tcp;
+ struct tcp_splice_conn tcp_splice;
+};
+
+/* Global Flow Table */
+extern union flow flowtab[];
+
+#endif /* FLOW_TABLE_H */
diff --git a/passt.h b/passt.h
index cac720a..3f5dfb9 100644
--- a/passt.h
+++ b/passt.h
@@ -219,6 +219,7 @@ struct ip6_ctx {
* @pasta_conf_ns: Configure namespace after creating it
* @no_copy_routes: Don't copy all routes when configuring target namespace
* @no_copy_addrs: Don't copy all addresses when configuring namespace
+ * @flow_count: Number of tracked packet flows (connections etc.)
* @no_tcp: Disable TCP operation
* @tcp: Context for TCP protocol handler
* @no_tcp: Disable UDP operation
@@ -277,6 +278,8 @@ struct ctx {
int no_copy_routes;
int no_copy_addrs;
+ unsigned flow_count;
+
int no_tcp;
struct tcp_ctx tcp;
int no_udp;
diff --git a/tcp.c b/tcp.c
index 1c25032..0119bd3 100644
--- a/tcp.c
+++ b/tcp.c
@@ -302,14 +302,14 @@
#include "flow.h"
#include "tcp_conn.h"
+#include "flow_table.h"
#define TCP_FRAMES_MEM 128
#define TCP_FRAMES \
(c->mode == MODE_PASST ? TCP_FRAMES_MEM : 1)
#define TCP_HASH_TABLE_LOAD 70 /* % */
-#define TCP_HASH_TABLE_SIZE (TCP_MAX_CONNS * 100 / \
- TCP_HASH_TABLE_LOAD)
+#define TCP_HASH_TABLE_SIZE (FLOW_MAX * 100 / TCP_HASH_TABLE_LOAD)
#define MAX_WS 8
#define MAX_WINDOW (1 << (16 + (MAX_WS)))
@@ -570,11 +570,8 @@ tcp6_l2_flags_buf[TCP_FRAMES_MEM];
static unsigned int tcp6_l2_flags_buf_used;
-/* TCP connections */
-union tcp_conn tc[TCP_MAX_CONNS];
-
-#define CONN(idx) (&tc[(idx)].tap)
-#define CONN_IDX(conn) ((union tcp_conn *)(conn) - tc)
+#define CONN(idx) (&flowtab[(idx)].tcp)
+#define CONN_IDX(conn) ((union flow *)(conn) - flowtab)
/** conn_at_idx() - Find a connection by index, if present
* @idx: Index of connection to lookup
@@ -583,7 +580,7 @@ union tcp_conn tc[TCP_MAX_CONNS];
*/
static inline struct tcp_tap_conn *conn_at_idx(int idx)
{
- if ((idx < 0) || (idx >= TCP_MAX_CONNS))
+ if ((idx < 0) || (idx >= FLOW_MAX))
return NULL;
ASSERT(CONN(idx)->f.type == FLOW_TCP);
return CONN(idx);
@@ -1306,26 +1303,26 @@ static struct tcp_tap_conn *tcp_hash_lookup(const struct ctx *c,
* @c: Execution context
* @hole: Pointer to recently closed connection
*/
-void tcp_table_compact(struct ctx *c, union tcp_conn *hole)
+void tcp_table_compact(struct ctx *c, union flow *hole)
{
- union tcp_conn *from;
+ union flow *from;
- if (CONN_IDX(hole) == --c->tcp.conn_count) {
+ if (CONN_IDX(hole) == --c->flow_count) {
debug("TCP: table compaction: maximum index was %li (%p)",
CONN_IDX(hole), (void *)hole);
memset(hole, 0, sizeof(*hole));
return;
}
- from = tc + c->tcp.conn_count;
+ from = flowtab + c->flow_count;
memcpy(hole, from, sizeof(*hole));
switch (from->f.type) {
case FLOW_TCP:
- tcp_tap_conn_update(c, &from->tap, &hole->tap);
+ tcp_tap_conn_update(c, &from->tcp, &hole->tcp);
break;
case FLOW_TCP_SPLICE:
- tcp_splice_conn_update(c, &hole->splice);
+ tcp_splice_conn_update(c, &hole->tcp_splice);
break;
default:
die("Unexpected %s in tcp_table_compact()",
@@ -1343,18 +1340,18 @@ void tcp_table_compact(struct ctx *c, union tcp_conn *hole)
/**
* tcp_conn_destroy() - Close sockets, trigger hash table removal and compaction
* @c: Execution context
- * @conn_union: Connection pointer (container union)
+ * @flow: Flow table entry for this connection
*/
-static void tcp_conn_destroy(struct ctx *c, union tcp_conn *conn_union)
+static void tcp_conn_destroy(struct ctx *c, union flow *flow)
{
- const struct tcp_tap_conn *conn = &conn_union->tap;
+ const struct tcp_tap_conn *conn = &flow->tcp;
close(conn->sock);
if (conn->timer != -1)
close(conn->timer);
tcp_hash_remove(c, conn);
- tcp_table_compact(c, conn_union);
+ tcp_table_compact(c, flow);
}
static void tcp_rst_do(struct ctx *c, struct tcp_tap_conn *conn);
@@ -1404,24 +1401,24 @@ static void tcp_l2_data_buf_flush(const struct ctx *c)
*/
void tcp_defer_handler(struct ctx *c)
{
- union tcp_conn *conn;
+ union flow *flow;
tcp_l2_flags_buf_flush(c);
tcp_l2_data_buf_flush(c);
- for (conn = tc + c->tcp.conn_count - 1; conn >= tc; conn--) {
- switch (conn->f.type) {
+ for (flow = flowtab + c->flow_count - 1; flow >= flowtab; flow--) {
+ switch (flow->f.type) {
case FLOW_TCP:
- if (conn->tap.events == CLOSED)
- tcp_conn_destroy(c, conn);
+ if (flow->tcp.events == CLOSED)
+ tcp_conn_destroy(c, flow);
break;
case FLOW_TCP_SPLICE:
- if (conn->splice.flags & CLOSING)
- tcp_splice_destroy(c, conn);
+ if (flow->tcp_splice.flags & CLOSING)
+ tcp_splice_destroy(c, flow);
break;
default:
die("Unexpected %s in tcp_defer_handler()",
- FLOW_TYPE(&conn->f));
+ FLOW_TYPE(&flow->f));
}
}
}
@@ -2004,7 +2001,7 @@ static void tcp_conn_from_tap(struct ctx *c,
(void)saddr;
- if (c->tcp.conn_count >= TCP_MAX_CONNS)
+ if (c->flow_count >= FLOW_MAX)
return;
if ((s = tcp_conn_pool_sock(pool)) < 0)
@@ -2030,7 +2027,7 @@ static void tcp_conn_from_tap(struct ctx *c,
}
}
- conn = CONN(c->tcp.conn_count++);
+ conn = CONN(c->flow_count++);
conn->f.type = FLOW_TCP;
conn->sock = s;
conn->timer = -1;
@@ -2775,24 +2772,24 @@ void tcp_listen_handler(struct ctx *c, union epoll_ref ref,
{
struct sockaddr_storage sa;
socklen_t sl = sizeof(sa);
- union tcp_conn *conn;
+ union flow *flow;
int s;
- if (c->no_tcp || c->tcp.conn_count >= TCP_MAX_CONNS)
+ if (c->no_tcp || c->flow_count >= FLOW_MAX)
return;
s = accept4(ref.fd, (struct sockaddr *)&sa, &sl, SOCK_NONBLOCK);
if (s < 0)
return;
- conn = tc + c->tcp.conn_count++;
+ flow = flowtab + c->flow_count++;
if (c->mode == MODE_PASTA &&
- tcp_splice_conn_from_sock(c, ref.tcp_listen, &conn->splice,
+ tcp_splice_conn_from_sock(c, ref.tcp_listen, &flow->tcp_splice,
s, (struct sockaddr *)&sa))
return;
- tcp_tap_conn_from_sock(c, ref.tcp_listen, &conn->tap, s,
+ tcp_tap_conn_from_sock(c, ref.tcp_listen, &flow->tcp, s,
(struct sockaddr *)&sa, now);
}
@@ -2921,18 +2918,18 @@ static void tcp_tap_sock_handler(struct ctx *c, struct tcp_tap_conn *conn,
*/
void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events)
{
- union tcp_conn *conn = tc + ref.tcp.index;
+ union flow *flow = flowtab + ref.tcp.index;
- switch (conn->f.type) {
+ switch (flow->f.type) {
case FLOW_TCP:
- tcp_tap_sock_handler(c, &conn->tap, events);
+ tcp_tap_sock_handler(c, &flow->tcp, events);
break;
case FLOW_TCP_SPLICE:
- tcp_splice_sock_handler(c, &conn->splice, ref.fd, events);
+ tcp_splice_sock_handler(c, &flow->tcp_splice, ref.fd, events);
break;
default:
die("Unexpected %s in tcp_sock_handler_compact()",
- FLOW_TYPE(&conn->f));
+ FLOW_TYPE(&flow->f));
}
}
@@ -3248,7 +3245,7 @@ static int tcp_port_rebind_outbound(void *arg)
*/
void tcp_timer(struct ctx *c, const struct timespec *ts)
{
- union tcp_conn *conn;
+ union flow *flow;
(void)ts;
@@ -3264,18 +3261,18 @@ void tcp_timer(struct ctx *c, const struct timespec *ts)
}
}
- for (conn = tc + c->tcp.conn_count - 1; conn >= tc; conn--) {
- switch (conn->f.type) {
+ for (flow = flowtab + c->flow_count - 1; flow >= flowtab; flow--) {
+ switch (flow->f.type) {
case FLOW_TCP:
- if (conn->tap.events == CLOSED)
- tcp_conn_destroy(c, conn);
+ if (flow->tcp.events == CLOSED)
+ tcp_conn_destroy(c, flow);
break;
case FLOW_TCP_SPLICE:
- tcp_splice_timer(c, conn);
+ tcp_splice_timer(c, flow);
break;
default:
die("Unexpected %s in tcp_timer()",
- FLOW_TYPE(&conn->f));
+ FLOW_TYPE(&flow->f));
}
}
diff --git a/tcp.h b/tcp.h
index 06965d8..c8b738d 100644
--- a/tcp.h
+++ b/tcp.h
@@ -8,9 +8,6 @@
#define TCP_TIMER_INTERVAL 1000 /* ms */
-#define TCP_CONN_INDEX_BITS 17 /* 128k - 1 */
-#define TCP_MAX_CONNS MAX_FROM_BITS(TCP_CONN_INDEX_BITS)
-
struct ctx;
void tcp_timer_handler(struct ctx *c, union epoll_ref ref);
@@ -56,7 +53,6 @@ union tcp_listen_epoll_ref {
/**
* struct tcp_ctx - Execution context for TCP routines
* @hash_secret: 128-bit secret for hash functions, ISN and hash table
- * @conn_count: Count of total connections in connection table
* @port_to_tap: Ports bound host-side, packets to tap or spliced
* @fwd_in: Port forwarding configuration for inbound packets
* @fwd_out: Port forwarding configuration for outbound packets
@@ -66,7 +62,6 @@ union tcp_listen_epoll_ref {
*/
struct tcp_ctx {
uint64_t hash_secret[2];
- int conn_count;
struct port_fwd fwd_in;
struct port_fwd fwd_out;
struct timespec timer_run;
diff --git a/tcp_conn.h b/tcp_conn.h
index 136f8da..5a107fc 100644
--- a/tcp_conn.h
+++ b/tcp_conn.h
@@ -40,7 +40,7 @@ struct tcp_tap_conn {
struct flow_common f;
bool in_epoll :1;
- int next_index :TCP_CONN_INDEX_BITS + 2;
+ int next_index :FLOW_INDEX_BITS + 2;
#define TCP_RETRANS_BITS 3
unsigned int retrans :TCP_RETRANS_BITS;
@@ -151,21 +151,6 @@ struct tcp_splice_conn {
uint32_t written[SIDES];
};
-/**
- * union tcp_conn - Descriptor for a TCP connection (spliced or non-spliced)
- * @c: Fields common between all variants
- * @tap: Fields specific to non-spliced connections
- * @splice: Fields specific to spliced connections
-*/
-union tcp_conn {
- struct flow_common f;
- struct tcp_tap_conn tap;
- struct tcp_splice_conn splice;
-};
-
-/* TCP connections */
-extern union tcp_conn tc[];
-
/* Socket pools */
#define TCP_SOCK_POOL_SIZE 32
@@ -173,9 +158,9 @@ extern int init_sock_pool4 [TCP_SOCK_POOL_SIZE];
extern int init_sock_pool6 [TCP_SOCK_POOL_SIZE];
void tcp_splice_conn_update(const struct ctx *c, struct tcp_splice_conn *new);
-void tcp_table_compact(struct ctx *c, union tcp_conn *hole);
-void tcp_splice_destroy(struct ctx *c, union tcp_conn *conn_union);
-void tcp_splice_timer(struct ctx *c, union tcp_conn *conn_union);
+void tcp_table_compact(struct ctx *c, union flow *hole);
+void tcp_splice_destroy(struct ctx *c, union flow *flow);
+void tcp_splice_timer(struct ctx *c, union flow *flow);
int tcp_conn_pool_sock(int pool[]);
int tcp_conn_new_sock(const struct ctx *c, sa_family_t af);
void tcp_sock_refill_pool(const struct ctx *c, int pool[], int af);
diff --git a/tcp_splice.c b/tcp_splice.c
index bfd2bd1..9f4831a 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -57,6 +57,7 @@
#include "flow.h"
#include "tcp_conn.h"
+#include "flow_table.h"
#define MAX_PIPE_SIZE (8UL * 1024 * 1024)
#define TCP_SPLICE_PIPE_POOL_SIZE 32
@@ -76,7 +77,7 @@ static int splice_pipe_pool [TCP_SPLICE_PIPE_POOL_SIZE][2];
#define CONN_V4(x) (!CONN_V6(x))
#define CONN_HAS(conn, set) ((conn->events & (set)) == (set))
#define CONN(idx) (&tc[(idx)].splice)
-#define CONN_IDX(conn) ((union tcp_conn *)(conn) - tc)
+#define CONN_IDX(conn) ((union flow *)(conn) - flowtab)
/* Display strings for connection events */
static const char *tcp_splice_event_str[] __attribute((__unused__)) = {
@@ -254,11 +255,11 @@ void tcp_splice_conn_update(const struct ctx *c, struct tcp_splice_conn *new)
/**
* tcp_splice_destroy() - Close spliced connection and pipes, clear
* @c: Execution context
- * @conn_union: Spliced connection (container union)
+ * @flow: Flow table entry
*/
-void tcp_splice_destroy(struct ctx *c, union tcp_conn *conn_union)
+void tcp_splice_destroy(struct ctx *c, union flow *flow)
{
- struct tcp_splice_conn *conn = &conn_union->splice;
+ struct tcp_splice_conn *conn = &flow->tcp_splice;
int side;
for (side = 0; side < SIDES; side++) {
@@ -283,7 +284,7 @@ void tcp_splice_destroy(struct ctx *c, union tcp_conn *conn_union)
conn->flags = 0;
debug("TCP (spliced): index %li, CLOSED", CONN_IDX(conn));
- tcp_table_compact(c, conn_union);
+ tcp_table_compact(c, flow);
}
/**
@@ -775,15 +776,15 @@ void tcp_splice_init(struct ctx *c)
/**
* tcp_splice_timer() - Timer for spliced connections
* @c: Execution context
- * @conn_union: Spliced connection (container union)
+ * @flow: Flow table entry
*/
-void tcp_splice_timer(struct ctx *c, union tcp_conn *conn_union)
+void tcp_splice_timer(struct ctx *c, union flow *flow)
{
- struct tcp_splice_conn *conn = &conn_union->splice;
+ struct tcp_splice_conn *conn = &flow->tcp_splice;
int side;
if (conn->flags & CLOSING) {
- tcp_splice_destroy(c, conn_union);
+ tcp_splice_destroy(c, flow);
return;
}
--
@@ -57,6 +57,7 @@
#include "flow.h"
#include "tcp_conn.h"
+#include "flow_table.h"
#define MAX_PIPE_SIZE (8UL * 1024 * 1024)
#define TCP_SPLICE_PIPE_POOL_SIZE 32
@@ -76,7 +77,7 @@ static int splice_pipe_pool [TCP_SPLICE_PIPE_POOL_SIZE][2];
#define CONN_V4(x) (!CONN_V6(x))
#define CONN_HAS(conn, set) ((conn->events & (set)) == (set))
#define CONN(idx) (&tc[(idx)].splice)
-#define CONN_IDX(conn) ((union tcp_conn *)(conn) - tc)
+#define CONN_IDX(conn) ((union flow *)(conn) - flowtab)
/* Display strings for connection events */
static const char *tcp_splice_event_str[] __attribute((__unused__)) = {
@@ -254,11 +255,11 @@ void tcp_splice_conn_update(const struct ctx *c, struct tcp_splice_conn *new)
/**
* tcp_splice_destroy() - Close spliced connection and pipes, clear
* @c: Execution context
- * @conn_union: Spliced connection (container union)
+ * @flow: Flow table entry
*/
-void tcp_splice_destroy(struct ctx *c, union tcp_conn *conn_union)
+void tcp_splice_destroy(struct ctx *c, union flow *flow)
{
- struct tcp_splice_conn *conn = &conn_union->splice;
+ struct tcp_splice_conn *conn = &flow->tcp_splice;
int side;
for (side = 0; side < SIDES; side++) {
@@ -283,7 +284,7 @@ void tcp_splice_destroy(struct ctx *c, union tcp_conn *conn_union)
conn->flags = 0;
debug("TCP (spliced): index %li, CLOSED", CONN_IDX(conn));
- tcp_table_compact(c, conn_union);
+ tcp_table_compact(c, flow);
}
/**
@@ -775,15 +776,15 @@ void tcp_splice_init(struct ctx *c)
/**
* tcp_splice_timer() - Timer for spliced connections
* @c: Execution context
- * @conn_union: Spliced connection (container union)
+ * @flow: Flow table entry
*/
-void tcp_splice_timer(struct ctx *c, union tcp_conn *conn_union)
+void tcp_splice_timer(struct ctx *c, union flow *flow)
{
- struct tcp_splice_conn *conn = &conn_union->splice;
+ struct tcp_splice_conn *conn = &flow->tcp_splice;
int side;
if (conn->flags & CLOSING) {
- tcp_splice_destroy(c, conn_union);
+ tcp_splice_destroy(c, flow);
return;
}
--
2.43.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 03/11] flow, tcp: Consolidate flow pointer<->index helpers
2023-11-26 23:33 [PATCH v2 00/11] Introduce unified flow table, first steps David Gibson
2023-11-26 23:33 ` [PATCH v2 01/11] flow, tcp: Generalise connection types David Gibson
2023-11-26 23:33 ` [PATCH v2 02/11] flow, tcp: Move TCP connection table to unified flow table David Gibson
@ 2023-11-26 23:33 ` David Gibson
2023-11-26 23:33 ` [PATCH v2 04/11] util: MAX_FROM_BITS() should be unsigned David Gibson
` (7 subsequent siblings)
10 siblings, 0 replies; 18+ messages in thread
From: David Gibson @ 2023-11-26 23:33 UTC (permalink / raw)
To: Stefano Brivio, passt-dev; +Cc: David Gibson
Both tcp.c and tcp_splice.c define CONN_IDX() variants to find the index
of their connection structures in the connection table, now become the
unified flow table. We can easily combine these into a common helper.
While we're there, add some trickery for some additional type safety.
They also define their own CONN() versions, which aren't so easily combined
since they need to return different types, but we can have them use a
common helper.
In the process, we standardise on always using an unsigned type to store
the connection / flow index, which makes more sense. tcp.c's conn_at_idx()
remains for now, but we change its parameter to unsigned to match. That in
turn means we can remove a check for negative values from it.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
flow_table.h | 25 ++++++++++++++++++++
tcp.c | 65 ++++++++++++++++++++++++++--------------------------
tcp_conn.h | 2 +-
tcp_splice.c | 21 ++++++++---------
4 files changed, 68 insertions(+), 45 deletions(-)
diff --git a/flow_table.h b/flow_table.h
index c4c646b..5e897bd 100644
--- a/flow_table.h
+++ b/flow_table.h
@@ -22,4 +22,29 @@ union flow {
/* Global Flow Table */
extern union flow flowtab[];
+
+/** flow_idx - Index of flow from common structure
+ * @f: Common flow fields pointer
+ *
+ * Return: index of @f in the flow table
+ */
+static inline unsigned flow_idx(const struct flow_common *f)
+{
+ return (union flow *)f - flowtab;
+}
+
+/** FLOW_IDX - Find the index of a flow
+ * @f_: Flow pointer, either union flow * or protocol specific
+ *
+ * Return: index of @f in the flow table
+ */
+#define FLOW_IDX(f_) (flow_idx(&(f_)->f))
+
+/** FLOW - Flow entry at a given index
+ * @idx: Flow index
+ *
+ * Return: pointer to entry @idx in the flow table
+ */
+#define FLOW(idx) (&flowtab[(idx)])
+
#endif /* FLOW_TABLE_H */
diff --git a/tcp.c b/tcp.c
index 0119bd3..859df6f 100644
--- a/tcp.c
+++ b/tcp.c
@@ -570,17 +570,16 @@ tcp6_l2_flags_buf[TCP_FRAMES_MEM];
static unsigned int tcp6_l2_flags_buf_used;
-#define CONN(idx) (&flowtab[(idx)].tcp)
-#define CONN_IDX(conn) ((union flow *)(conn) - flowtab)
+#define CONN(idx) (&(FLOW(idx)->tcp))
/** conn_at_idx() - Find a connection by index, if present
* @idx: Index of connection to lookup
*
* Return: pointer to connection, or NULL if @idx is out of bounds
*/
-static inline struct tcp_tap_conn *conn_at_idx(int idx)
+static inline struct tcp_tap_conn *conn_at_idx(unsigned idx)
{
- if ((idx < 0) || (idx >= FLOW_MAX))
+ if (idx >= FLOW_MAX)
return NULL;
ASSERT(CONN(idx)->f.type == FLOW_TCP);
return CONN(idx);
@@ -640,7 +639,7 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
{
int m = conn->in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
union epoll_ref ref = { .type = EPOLL_TYPE_TCP, .fd = conn->sock,
- .tcp.index = CONN_IDX(conn) };
+ .tcp.index = FLOW_IDX(conn) };
struct epoll_event ev = { .data.u64 = ref.u64 };
if (conn->events == CLOSED) {
@@ -661,7 +660,7 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
if (conn->timer != -1) {
union epoll_ref ref_t = { .type = EPOLL_TYPE_TCP_TIMER,
.fd = conn->sock,
- .tcp.index = CONN_IDX(conn) };
+ .tcp.index = FLOW_IDX(conn) };
struct epoll_event ev_t = { .data.u64 = ref_t.u64,
.events = EPOLLIN | EPOLLET };
@@ -689,7 +688,7 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
if (conn->timer == -1) {
union epoll_ref ref = { .type = EPOLL_TYPE_TCP_TIMER,
.fd = conn->sock,
- .tcp.index = CONN_IDX(conn) };
+ .tcp.index = FLOW_IDX(conn) };
struct epoll_event ev = { .data.u64 = ref.u64,
.events = EPOLLIN | EPOLLET };
int fd;
@@ -725,7 +724,7 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
it.it_value.tv_sec = ACT_TIMEOUT;
}
- debug("TCP: index %li, timer expires in %lu.%03lus", CONN_IDX(conn),
+ debug("TCP: index %u, timer expires in %lu.%03lus", FLOW_IDX(conn),
it.it_value.tv_sec, it.it_value.tv_nsec / 1000 / 1000);
timerfd_settime(conn->timer, 0, &it, NULL);
@@ -748,7 +747,7 @@ static void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,
conn->flags &= flag;
if (flag_index >= 0) {
- debug("TCP: index %li: %s dropped", CONN_IDX(conn),
+ debug("TCP: index %u: %s dropped", FLOW_IDX(conn),
tcp_flag_str[flag_index]);
}
} else {
@@ -769,7 +768,7 @@ static void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,
conn->flags |= flag;
if (flag_index >= 0) {
- debug("TCP: index %li: %s", CONN_IDX(conn),
+ debug("TCP: index %u: %s", FLOW_IDX(conn),
tcp_flag_str[flag_index]);
}
}
@@ -819,12 +818,12 @@ static void conn_event_do(const struct ctx *c, struct tcp_tap_conn *conn,
new += 5;
if (prev != new) {
- debug("TCP: index %li, %s: %s -> %s", CONN_IDX(conn),
+ debug("TCP: index %u, %s: %s -> %s", FLOW_IDX(conn),
num == -1 ? "CLOSED" : tcp_event_str[num],
prev == -1 ? "CLOSED" : tcp_state_str[prev],
(new == -1 || num == -1) ? "CLOSED" : tcp_state_str[new]);
} else {
- debug("TCP: index %li, %s", CONN_IDX(conn),
+ debug("TCP: index %u, %s", FLOW_IDX(conn),
num == -1 ? "CLOSED" : tcp_event_str[num]);
}
@@ -1204,11 +1203,11 @@ static void tcp_hash_insert(const struct ctx *c, struct tcp_tap_conn *conn)
int b;
b = tcp_hash(c, &conn->faddr, conn->eport, conn->fport);
- conn->next_index = tc_hash[b] ? CONN_IDX(tc_hash[b]) : -1;
+ conn->next_index = tc_hash[b] ? FLOW_IDX(tc_hash[b]) : -1U;
tc_hash[b] = conn;
- debug("TCP: hash table insert: index %li, sock %i, bucket: %i, next: "
- "%p", CONN_IDX(conn), conn->sock, b,
+ debug("TCP: hash table insert: index %u, sock %i, bucket: %i, next: "
+ "%p", FLOW_IDX(conn), conn->sock, b,
(void *)conn_at_idx(conn->next_index));
}
@@ -1234,8 +1233,8 @@ static void tcp_hash_remove(const struct ctx *c,
}
}
- debug("TCP: hash table remove: index %li, sock %i, bucket: %i, new: %p",
- CONN_IDX(conn), conn->sock, b,
+ debug("TCP: hash table remove: index %u, sock %i, bucket: %i, new: %p",
+ FLOW_IDX(conn), conn->sock, b,
(void *)(prev ? conn_at_idx(prev->next_index) : tc_hash[b]));
}
@@ -1255,16 +1254,16 @@ static void tcp_tap_conn_update(const struct ctx *c, struct tcp_tap_conn *old,
prev = entry, entry = conn_at_idx(entry->next_index)) {
if (entry == old) {
if (prev)
- prev->next_index = CONN_IDX(new);
+ prev->next_index = FLOW_IDX(new);
else
tc_hash[b] = new;
break;
}
}
- debug("TCP: hash table update: old index %li, new index %li, sock %i, "
+ debug("TCP: hash table update: old index %u, new index %u, sock %i, "
"bucket: %i, old: %p, new: %p",
- CONN_IDX(old), CONN_IDX(new), new->sock, b,
+ FLOW_IDX(old), FLOW_IDX(new), new->sock, b,
(void *)old, (void *)new);
tcp_epoll_ctl(c, new);
@@ -1307,9 +1306,9 @@ void tcp_table_compact(struct ctx *c, union flow *hole)
{
union flow *from;
- if (CONN_IDX(hole) == --c->flow_count) {
- debug("TCP: table compaction: maximum index was %li (%p)",
- CONN_IDX(hole), (void *)hole);
+ if (FLOW_IDX(hole) == --c->flow_count) {
+ debug("TCP: table compaction: maximum index was %u (%p)",
+ FLOW_IDX(hole), (void *)hole);
memset(hole, 0, sizeof(*hole));
return;
}
@@ -1329,9 +1328,9 @@ void tcp_table_compact(struct ctx *c, union flow *hole)
FLOW_TYPE(&from->f));
}
- debug("TCP: table compaction (%s): old index %li, new index %li, "
+ debug("TCP: table compaction (%s): old index %u, new index %u, "
"from: %p, to: %p",
- FLOW_TYPE(&from->f), CONN_IDX(from), CONN_IDX(hole),
+ FLOW_TYPE(&from->f), FLOW_IDX(from), FLOW_IDX(hole),
(void *)from, (void *)hole);
memset(from, 0, sizeof(*from));
@@ -1357,7 +1356,7 @@ static void tcp_conn_destroy(struct ctx *c, union flow *flow)
static void tcp_rst_do(struct ctx *c, struct tcp_tap_conn *conn);
#define tcp_rst(c, conn) \
do { \
- debug("TCP: index %li, reset at %s:%i", CONN_IDX(conn), \
+ debug("TCP: index %u, reset at %s:%i", FLOW_IDX(conn), \
__func__, __LINE__); \
tcp_rst_do(c, conn); \
} while (0)
@@ -2581,7 +2580,7 @@ int tcp_tap_handler(struct ctx *c, uint8_t pif, int af,
return 1;
}
- trace("TCP: packet length %lu from tap for index %lu", len, CONN_IDX(conn));
+ trace("TCP: packet length %lu from tap for index %u", len, FLOW_IDX(conn));
if (th->rst) {
conn_event(c, conn, CLOSED);
@@ -2821,17 +2820,17 @@ void tcp_timer_handler(struct ctx *c, union epoll_ref ref)
tcp_timer_ctl(c, conn);
} else if (conn->flags & ACK_FROM_TAP_DUE) {
if (!(conn->events & ESTABLISHED)) {
- debug("TCP: index %li, handshake timeout", CONN_IDX(conn));
+ debug("TCP: index %u, handshake timeout", FLOW_IDX(conn));
tcp_rst(c, conn);
} else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
- debug("TCP: index %li, FIN timeout", CONN_IDX(conn));
+ debug("TCP: index %u, FIN timeout", FLOW_IDX(conn));
tcp_rst(c, conn);
} else if (conn->retrans == TCP_MAX_RETRANS) {
- debug("TCP: index %li, retransmissions count exceeded",
- CONN_IDX(conn));
+ debug("TCP: index %u, retransmissions count exceeded",
+ FLOW_IDX(conn));
tcp_rst(c, conn);
} else {
- debug("TCP: index %li, ACK timeout, retry", CONN_IDX(conn));
+ debug("TCP: index %u, ACK timeout, retry", FLOW_IDX(conn));
conn->retrans++;
conn->seq_to_tap = conn->seq_ack_from_tap;
tcp_data_from_sock(c, conn);
@@ -2849,7 +2848,7 @@ void tcp_timer_handler(struct ctx *c, union epoll_ref ref)
*/
timerfd_settime(conn->timer, 0, &new, &old);
if (old.it_value.tv_sec == ACT_TIMEOUT) {
- debug("TCP: index %li, activity timeout", CONN_IDX(conn));
+ debug("TCP: index %u, activity timeout", FLOW_IDX(conn));
tcp_rst(c, conn);
}
}
diff --git a/tcp_conn.h b/tcp_conn.h
index 5a107fc..5a9376e 100644
--- a/tcp_conn.h
+++ b/tcp_conn.h
@@ -40,7 +40,7 @@ struct tcp_tap_conn {
struct flow_common f;
bool in_epoll :1;
- int next_index :FLOW_INDEX_BITS + 2;
+ unsigned next_index :FLOW_INDEX_BITS + 2;
#define TCP_RETRANS_BITS 3
unsigned int retrans :TCP_RETRANS_BITS;
diff --git a/tcp_splice.c b/tcp_splice.c
index 9f4831a..3955417 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -76,8 +76,7 @@ static int splice_pipe_pool [TCP_SPLICE_PIPE_POOL_SIZE][2];
#define CONN_V6(x) (x->flags & SPLICE_V6)
#define CONN_V4(x) (!CONN_V6(x))
#define CONN_HAS(conn, set) ((conn->events & (set)) == (set))
-#define CONN(idx) (&tc[(idx)].splice)
-#define CONN_IDX(conn) ((union flow *)(conn) - flowtab)
+#define CONN(idx) (&FLOW(idx)->tcp_splice)
/* Display strings for connection events */
static const char *tcp_splice_event_str[] __attribute((__unused__)) = {
@@ -129,8 +128,8 @@ static int tcp_splice_epoll_ctl(const struct ctx *c,
{
int m = conn->in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
union epoll_ref ref[SIDES] = {
- { .type = EPOLL_TYPE_TCP, .fd = conn->s[0], .tcp.index = CONN_IDX(conn) },
- { .type = EPOLL_TYPE_TCP, .fd = conn->s[1], .tcp.index = CONN_IDX(conn) }
+ { .type = EPOLL_TYPE_TCP, .fd = conn->s[0], .tcp.index = FLOW_IDX(conn) },
+ { .type = EPOLL_TYPE_TCP, .fd = conn->s[1], .tcp.index = FLOW_IDX(conn) }
};
struct epoll_event ev[SIDES] = { { .data.u64 = ref[0].u64 },
{ .data.u64 = ref[1].u64 } };
@@ -140,8 +139,8 @@ static int tcp_splice_epoll_ctl(const struct ctx *c,
if (epoll_ctl(c->epollfd, m, conn->s[0], &ev[0]) ||
epoll_ctl(c->epollfd, m, conn->s[1], &ev[1])) {
int ret = -errno;
- err("TCP (spliced): index %li, ERROR on epoll_ctl(): %s",
- CONN_IDX(conn), strerror(errno));
+ err("TCP (spliced): index %u, ERROR on epoll_ctl(): %s",
+ FLOW_IDX(conn), strerror(errno));
return ret;
}
@@ -167,7 +166,7 @@ static void conn_flag_do(const struct ctx *c, struct tcp_splice_conn *conn,
conn->flags &= flag;
if (flag_index >= 0) {
- debug("TCP (spliced): index %li: %s dropped", CONN_IDX(conn),
+ debug("TCP (spliced): index %u: %s dropped", FLOW_IDX(conn),
tcp_splice_flag_str[flag_index]);
}
} else {
@@ -178,7 +177,7 @@ static void conn_flag_do(const struct ctx *c, struct tcp_splice_conn *conn,
conn->flags |= flag;
if (flag_index >= 0) {
- debug("TCP (spliced): index %li: %s", CONN_IDX(conn),
+ debug("TCP (spliced): index %u: %s", FLOW_IDX(conn),
tcp_splice_flag_str[flag_index]);
}
}
@@ -213,7 +212,7 @@ static void conn_event_do(const struct ctx *c, struct tcp_splice_conn *conn,
conn->events &= event;
if (flag_index >= 0) {
- debug("TCP (spliced): index %li, ~%s", CONN_IDX(conn),
+ debug("TCP (spliced): index %u, ~%s", FLOW_IDX(conn),
tcp_splice_event_str[flag_index]);
}
} else {
@@ -224,7 +223,7 @@ static void conn_event_do(const struct ctx *c, struct tcp_splice_conn *conn,
conn->events |= event;
if (flag_index >= 0) {
- debug("TCP (spliced): index %li, %s", CONN_IDX(conn),
+ debug("TCP (spliced): index %u, %s", FLOW_IDX(conn),
tcp_splice_event_str[flag_index]);
}
}
@@ -282,7 +281,7 @@ void tcp_splice_destroy(struct ctx *c, union flow *flow)
conn->events = SPLICE_CLOSED;
conn->flags = 0;
- debug("TCP (spliced): index %li, CLOSED", CONN_IDX(conn));
+ debug("TCP (spliced): index %u, CLOSED", FLOW_IDX(conn));
tcp_table_compact(c, flow);
}
--
@@ -76,8 +76,7 @@ static int splice_pipe_pool [TCP_SPLICE_PIPE_POOL_SIZE][2];
#define CONN_V6(x) (x->flags & SPLICE_V6)
#define CONN_V4(x) (!CONN_V6(x))
#define CONN_HAS(conn, set) ((conn->events & (set)) == (set))
-#define CONN(idx) (&tc[(idx)].splice)
-#define CONN_IDX(conn) ((union flow *)(conn) - flowtab)
+#define CONN(idx) (&FLOW(idx)->tcp_splice)
/* Display strings for connection events */
static const char *tcp_splice_event_str[] __attribute((__unused__)) = {
@@ -129,8 +128,8 @@ static int tcp_splice_epoll_ctl(const struct ctx *c,
{
int m = conn->in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
union epoll_ref ref[SIDES] = {
- { .type = EPOLL_TYPE_TCP, .fd = conn->s[0], .tcp.index = CONN_IDX(conn) },
- { .type = EPOLL_TYPE_TCP, .fd = conn->s[1], .tcp.index = CONN_IDX(conn) }
+ { .type = EPOLL_TYPE_TCP, .fd = conn->s[0], .tcp.index = FLOW_IDX(conn) },
+ { .type = EPOLL_TYPE_TCP, .fd = conn->s[1], .tcp.index = FLOW_IDX(conn) }
};
struct epoll_event ev[SIDES] = { { .data.u64 = ref[0].u64 },
{ .data.u64 = ref[1].u64 } };
@@ -140,8 +139,8 @@ static int tcp_splice_epoll_ctl(const struct ctx *c,
if (epoll_ctl(c->epollfd, m, conn->s[0], &ev[0]) ||
epoll_ctl(c->epollfd, m, conn->s[1], &ev[1])) {
int ret = -errno;
- err("TCP (spliced): index %li, ERROR on epoll_ctl(): %s",
- CONN_IDX(conn), strerror(errno));
+ err("TCP (spliced): index %u, ERROR on epoll_ctl(): %s",
+ FLOW_IDX(conn), strerror(errno));
return ret;
}
@@ -167,7 +166,7 @@ static void conn_flag_do(const struct ctx *c, struct tcp_splice_conn *conn,
conn->flags &= flag;
if (flag_index >= 0) {
- debug("TCP (spliced): index %li: %s dropped", CONN_IDX(conn),
+ debug("TCP (spliced): index %u: %s dropped", FLOW_IDX(conn),
tcp_splice_flag_str[flag_index]);
}
} else {
@@ -178,7 +177,7 @@ static void conn_flag_do(const struct ctx *c, struct tcp_splice_conn *conn,
conn->flags |= flag;
if (flag_index >= 0) {
- debug("TCP (spliced): index %li: %s", CONN_IDX(conn),
+ debug("TCP (spliced): index %u: %s", FLOW_IDX(conn),
tcp_splice_flag_str[flag_index]);
}
}
@@ -213,7 +212,7 @@ static void conn_event_do(const struct ctx *c, struct tcp_splice_conn *conn,
conn->events &= event;
if (flag_index >= 0) {
- debug("TCP (spliced): index %li, ~%s", CONN_IDX(conn),
+ debug("TCP (spliced): index %u, ~%s", FLOW_IDX(conn),
tcp_splice_event_str[flag_index]);
}
} else {
@@ -224,7 +223,7 @@ static void conn_event_do(const struct ctx *c, struct tcp_splice_conn *conn,
conn->events |= event;
if (flag_index >= 0) {
- debug("TCP (spliced): index %li, %s", CONN_IDX(conn),
+ debug("TCP (spliced): index %u, %s", FLOW_IDX(conn),
tcp_splice_event_str[flag_index]);
}
}
@@ -282,7 +281,7 @@ void tcp_splice_destroy(struct ctx *c, union flow *flow)
conn->events = SPLICE_CLOSED;
conn->flags = 0;
- debug("TCP (spliced): index %li, CLOSED", CONN_IDX(conn));
+ debug("TCP (spliced): index %u, CLOSED", FLOW_IDX(conn));
tcp_table_compact(c, flow);
}
--
2.43.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 04/11] util: MAX_FROM_BITS() should be unsigned
2023-11-26 23:33 [PATCH v2 00/11] Introduce unified flow table, first steps David Gibson
` (2 preceding siblings ...)
2023-11-26 23:33 ` [PATCH v2 03/11] flow, tcp: Consolidate flow pointer<->index helpers David Gibson
@ 2023-11-26 23:33 ` David Gibson
2023-11-26 23:33 ` [PATCH v2 05/11] flow: Make unified version of flow table compaction David Gibson
` (6 subsequent siblings)
10 siblings, 0 replies; 18+ messages in thread
From: David Gibson @ 2023-11-26 23:33 UTC (permalink / raw)
To: Stefano Brivio, passt-dev; +Cc: David Gibson
MAX_FROM_BITS() computes the maximum value representable in a number of
bits. The expression for that is an unsigned value, but we explicitly cast
it to a signed int. It looks like this is because one of the main users is
for FD_REF_MAX, which is used to bound fd values, typically stored as a
signed int.
The value MAX_FROM_BITS() is calculating is naturally non-negative, though,
so it makes more sense for it to be unsigned, and to move the case to the
definition of FD_REF_MAX.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
passt.h | 2 +-
util.h | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/passt.h b/passt.h
index 3f5dfb9..0fce637 100644
--- a/passt.h
+++ b/passt.h
@@ -87,7 +87,7 @@ union epoll_ref {
struct {
enum epoll_type type:8;
#define FD_REF_BITS 24
-#define FD_REF_MAX MAX_FROM_BITS(FD_REF_BITS)
+#define FD_REF_MAX ((int)MAX_FROM_BITS(FD_REF_BITS))
int32_t fd:FD_REF_BITS;
union {
union tcp_epoll_ref tcp;
diff --git a/util.h b/util.h
index 78a8fb2..b1106e8 100644
--- a/util.h
+++ b/util.h
@@ -42,7 +42,7 @@
#define ROUND_DOWN(x, y) ((x) & ~((y) - 1))
#define ROUND_UP(x, y) (((x) + (y) - 1) & ~((y) - 1))
-#define MAX_FROM_BITS(n) ((int)((1U << (n)) - 1))
+#define MAX_FROM_BITS(n) (((1U << (n)) - 1))
#define BIT(n) (1UL << (n))
#define BITMAP_BIT(n) (BIT((n) % (sizeof(long) * 8)))
--
@@ -42,7 +42,7 @@
#define ROUND_DOWN(x, y) ((x) & ~((y) - 1))
#define ROUND_UP(x, y) (((x) + (y) - 1) & ~((y) - 1))
-#define MAX_FROM_BITS(n) ((int)((1U << (n)) - 1))
+#define MAX_FROM_BITS(n) (((1U << (n)) - 1))
#define BIT(n) (1UL << (n))
#define BITMAP_BIT(n) (BIT((n) % (sizeof(long) * 8)))
--
2.43.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 05/11] flow: Make unified version of flow table compaction
2023-11-26 23:33 [PATCH v2 00/11] Introduce unified flow table, first steps David Gibson
` (3 preceding siblings ...)
2023-11-26 23:33 ` [PATCH v2 04/11] util: MAX_FROM_BITS() should be unsigned David Gibson
@ 2023-11-26 23:33 ` David Gibson
2023-11-26 23:33 ` [PATCH v2 06/11] flow, tcp: Add logging helpers for connection related messages David Gibson
` (5 subsequent siblings)
10 siblings, 0 replies; 18+ messages in thread
From: David Gibson @ 2023-11-26 23:33 UTC (permalink / raw)
To: Stefano Brivio, passt-dev; +Cc: David Gibson
tcp_table_compact() will move entries in the connection/flow table to keep
it compact when other entries are removed. The moved entries need not have
the same type as the flow removed, so it needs to be able to handle moving
any type of flow. Therefore, move it to flow.c rather than being
purportedly TCP specific.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
flow.c | 39 +++++++++++++++++++++++++++++++++++++++
flow.h | 2 ++
tcp.c | 46 ++++------------------------------------------
tcp_conn.h | 3 ++-
tcp_splice.c | 2 +-
5 files changed, 48 insertions(+), 44 deletions(-)
diff --git a/flow.c b/flow.c
index a9bb8f5..0fff119 100644
--- a/flow.c
+++ b/flow.c
@@ -25,3 +25,42 @@ const char *flow_type_str[] = {
/* Global Flow Table */
union flow flowtab[FLOW_MAX];
+
+/**
+ * flow_table_compact() - Perform compaction on flow table
+ * @c: Execution context
+ * @hole: Pointer to recently closed flow
+ */
+void flow_table_compact(struct ctx *c, union flow *hole)
+{
+ union flow *from;
+
+ if (FLOW_IDX(hole) == --c->flow_count) {
+ debug("flow: table compaction: maximum index was %u (%p)",
+ FLOW_IDX(hole), (void *)hole);
+ memset(hole, 0, sizeof(*hole));
+ return;
+ }
+
+ from = flowtab + c->flow_count;
+ memcpy(hole, from, sizeof(*hole));
+
+ switch (from->f.type) {
+ case FLOW_TCP:
+ tcp_tap_conn_update(c, &from->tcp, &hole->tcp);
+ break;
+ case FLOW_TCP_SPLICE:
+ tcp_splice_conn_update(c, &hole->tcp_splice);
+ break;
+ default:
+ die("Unexpected %s in tcp_table_compact()",
+ FLOW_TYPE(&from->f));
+ }
+
+ debug("flow: table compaction (%s): old index %u, new index %u, "
+ "from: %p, to: %p",
+ FLOW_TYPE(&from->f), FLOW_IDX(from), FLOW_IDX(hole),
+ (void *)from, (void *)hole);
+
+ memset(from, 0, sizeof(*from));
+}
diff --git a/flow.h b/flow.h
index 57289a8..9f49195 100644
--- a/flow.h
+++ b/flow.h
@@ -41,4 +41,6 @@ struct flow_common {
union flow;
+void flow_table_compact(struct ctx *c, union flow *hole);
+
#endif /* FLOW_H */
diff --git a/tcp.c b/tcp.c
index 859df6f..6924dd2 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1244,8 +1244,9 @@ static void tcp_hash_remove(const struct ctx *c,
* @old: Old location of tcp_tap_conn
* @new: New location of tcp_tap_conn
*/
-static void tcp_tap_conn_update(const struct ctx *c, struct tcp_tap_conn *old,
- struct tcp_tap_conn *new)
+void tcp_tap_conn_update(const struct ctx *c, struct tcp_tap_conn *old,
+ struct tcp_tap_conn *new)
+
{
struct tcp_tap_conn *entry, *prev = NULL;
int b = tcp_conn_hash(c, old);
@@ -1297,45 +1298,6 @@ static struct tcp_tap_conn *tcp_hash_lookup(const struct ctx *c,
return NULL;
}
-/**
- * tcp_table_compact() - Perform compaction on connection table
- * @c: Execution context
- * @hole: Pointer to recently closed connection
- */
-void tcp_table_compact(struct ctx *c, union flow *hole)
-{
- union flow *from;
-
- if (FLOW_IDX(hole) == --c->flow_count) {
- debug("TCP: table compaction: maximum index was %u (%p)",
- FLOW_IDX(hole), (void *)hole);
- memset(hole, 0, sizeof(*hole));
- return;
- }
-
- from = flowtab + c->flow_count;
- memcpy(hole, from, sizeof(*hole));
-
- switch (from->f.type) {
- case FLOW_TCP:
- tcp_tap_conn_update(c, &from->tcp, &hole->tcp);
- break;
- case FLOW_TCP_SPLICE:
- tcp_splice_conn_update(c, &hole->tcp_splice);
- break;
- default:
- die("Unexpected %s in tcp_table_compact()",
- FLOW_TYPE(&from->f));
- }
-
- debug("TCP: table compaction (%s): old index %u, new index %u, "
- "from: %p, to: %p",
- FLOW_TYPE(&from->f), FLOW_IDX(from), FLOW_IDX(hole),
- (void *)from, (void *)hole);
-
- memset(from, 0, sizeof(*from));
-}
-
/**
* tcp_conn_destroy() - Close sockets, trigger hash table removal and compaction
* @c: Execution context
@@ -1350,7 +1312,7 @@ static void tcp_conn_destroy(struct ctx *c, union flow *flow)
close(conn->timer);
tcp_hash_remove(c, conn);
- tcp_table_compact(c, flow);
+ flow_table_compact(c, flow);
}
static void tcp_rst_do(struct ctx *c, struct tcp_tap_conn *conn);
diff --git a/tcp_conn.h b/tcp_conn.h
index 5a9376e..3900305 100644
--- a/tcp_conn.h
+++ b/tcp_conn.h
@@ -157,8 +157,9 @@ struct tcp_splice_conn {
extern int init_sock_pool4 [TCP_SOCK_POOL_SIZE];
extern int init_sock_pool6 [TCP_SOCK_POOL_SIZE];
+void tcp_tap_conn_update(const struct ctx *c, struct tcp_tap_conn *old,
+ struct tcp_tap_conn *new);
void tcp_splice_conn_update(const struct ctx *c, struct tcp_splice_conn *new);
-void tcp_table_compact(struct ctx *c, union flow *hole);
void tcp_splice_destroy(struct ctx *c, union flow *flow);
void tcp_splice_timer(struct ctx *c, union flow *flow);
int tcp_conn_pool_sock(int pool[]);
diff --git a/tcp_splice.c b/tcp_splice.c
index 3955417..212fe16 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -283,7 +283,7 @@ void tcp_splice_destroy(struct ctx *c, union flow *flow)
conn->flags = 0;
debug("TCP (spliced): index %u, CLOSED", FLOW_IDX(conn));
- tcp_table_compact(c, flow);
+ flow_table_compact(c, flow);
}
/**
--
@@ -283,7 +283,7 @@ void tcp_splice_destroy(struct ctx *c, union flow *flow)
conn->flags = 0;
debug("TCP (spliced): index %u, CLOSED", FLOW_IDX(conn));
- tcp_table_compact(c, flow);
+ flow_table_compact(c, flow);
}
/**
--
2.43.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 06/11] flow, tcp: Add logging helpers for connection related messages
2023-11-26 23:33 [PATCH v2 00/11] Introduce unified flow table, first steps David Gibson
` (4 preceding siblings ...)
2023-11-26 23:33 ` [PATCH v2 05/11] flow: Make unified version of flow table compaction David Gibson
@ 2023-11-26 23:33 ` David Gibson
2023-11-26 23:33 ` [PATCH v2 07/11] flow: Introduce 'sidx' type to represent one side of one flow David Gibson
` (4 subsequent siblings)
10 siblings, 0 replies; 18+ messages in thread
From: David Gibson @ 2023-11-26 23:33 UTC (permalink / raw)
To: Stefano Brivio, passt-dev; +Cc: David Gibson
Most of the messages logged by the TCP code (be they errors, debug or
trace messages) are related to a specific connection / flow. We're fairly
consistent about prefixing these with the type of connection and the
connection / flow index. However there are a few places where we put the
index later in the message or omit it entirely. The template with the
prefix is also a little bulky to carry around for every message,
particularly for spliced connections.
To help keep this consistent, introduce some helpers to log messages
linked to a specific flow. It takes the flow as a parameter and adds a
uniform prefix to each message. This makes things slightly neater now, but
more importantly will help keep formatting consistent as we add more things
to the flow table.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
flow.c | 18 ++++++++++++
flow.h | 14 +++++++++
tcp.c | 81 ++++++++++++++++++++++++----------------------------
tcp_splice.c | 61 +++++++++++++++++----------------------
4 files changed, 96 insertions(+), 78 deletions(-)
diff --git a/flow.c b/flow.c
index 0fff119..4d479c2 100644
--- a/flow.c
+++ b/flow.c
@@ -64,3 +64,21 @@ void flow_table_compact(struct ctx *c, union flow *hole)
memset(from, 0, sizeof(*from));
}
+
+/** flow_log_ - Log flow-related message
+ * @f: flow the message is related to
+ * @pri: Log priority
+ * @fmt: Format string
+ * @...: printf-arguments
+ */
+void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...)
+{
+ char msg[BUFSIZ];
+ va_list args;
+
+ va_start(args, fmt);
+ (void)vsnprintf(msg, sizeof(msg), fmt, args);
+ va_end(args);
+
+ logmsg(pri, "Flow %u (%s): %s", flow_idx(f), FLOW_TYPE(f), msg);
+}
diff --git a/flow.h b/flow.h
index 9f49195..b6da516 100644
--- a/flow.h
+++ b/flow.h
@@ -43,4 +43,18 @@ union flow;
void flow_table_compact(struct ctx *c, union flow *hole);
+void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...)
+ __attribute__((format(printf, 3, 4)));
+
+#define flow_log(f_, pri, ...) flow_log_(&(f_)->f, (pri), __VA_ARGS__)
+
+#define flow_dbg(f, ...) flow_log((f), LOG_DEBUG, __VA_ARGS__)
+#define flow_err(f, ...) flow_log((f), LOG_ERR, __VA_ARGS__)
+
+#define flow_trace(f, ...) \
+ do { \
+ if (log_trace) \
+ flow_dbg((f), __VA_ARGS__); \
+ } while (0)
+
#endif /* FLOW_H */
diff --git a/tcp.c b/tcp.c
index 6924dd2..f427047 100644
--- a/tcp.c
+++ b/tcp.c
@@ -624,7 +624,7 @@ static void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,
unsigned long flag);
#define conn_flag(c, conn, flag) \
do { \
- trace("TCP: flag at %s:%i", __func__, __LINE__); \
+ flow_trace(conn, "flag at %s:%i", __func__, __LINE__); \
conn_flag_do(c, conn, flag); \
} while (0)
@@ -695,7 +695,8 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
fd = timerfd_create(CLOCK_MONOTONIC, 0);
if (fd == -1 || fd > FD_REF_MAX) {
- debug("TCP: failed to get timer: %s", strerror(errno));
+ flow_dbg(conn, "failed to get timer: %s",
+ strerror(errno));
if (fd > -1)
close(fd);
conn->timer = -1;
@@ -704,7 +705,8 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
conn->timer = fd;
if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, conn->timer, &ev)) {
- debug("TCP: failed to add timer: %s", strerror(errno));
+ flow_dbg(conn, "failed to add timer: %s",
+ strerror(errno));
close(conn->timer);
conn->timer = -1;
return;
@@ -724,8 +726,8 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
it.it_value.tv_sec = ACT_TIMEOUT;
}
- debug("TCP: index %u, timer expires in %lu.%03lus", FLOW_IDX(conn),
- it.it_value.tv_sec, it.it_value.tv_nsec / 1000 / 1000);
+ flow_dbg(conn, "timer expires in %lu.%03lus", it.it_value.tv_sec,
+ it.it_value.tv_nsec / 1000 / 1000);
timerfd_settime(conn->timer, 0, &it, NULL);
}
@@ -746,10 +748,8 @@ static void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,
return;
conn->flags &= flag;
- if (flag_index >= 0) {
- debug("TCP: index %u: %s dropped", FLOW_IDX(conn),
- tcp_flag_str[flag_index]);
- }
+ if (flag_index >= 0)
+ flow_dbg(conn, "%s dropped", tcp_flag_str[flag_index]);
} else {
int flag_index = fls(flag);
@@ -767,10 +767,8 @@ static void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,
}
conn->flags |= flag;
- if (flag_index >= 0) {
- debug("TCP: index %u: %s", FLOW_IDX(conn),
- tcp_flag_str[flag_index]);
- }
+ if (flag_index >= 0)
+ flow_dbg(conn, "%s", tcp_flag_str[flag_index]);
}
if (flag == STALLED || flag == ~STALLED)
@@ -817,15 +815,14 @@ static void conn_event_do(const struct ctx *c, struct tcp_tap_conn *conn,
if (conn->flags & ACTIVE_CLOSE)
new += 5;
- if (prev != new) {
- debug("TCP: index %u, %s: %s -> %s", FLOW_IDX(conn),
- num == -1 ? "CLOSED" : tcp_event_str[num],
- prev == -1 ? "CLOSED" : tcp_state_str[prev],
- (new == -1 || num == -1) ? "CLOSED" : tcp_state_str[new]);
- } else {
- debug("TCP: index %u, %s", FLOW_IDX(conn),
- num == -1 ? "CLOSED" : tcp_event_str[num]);
- }
+ if (prev != new)
+ flow_dbg(conn, "%s: %s -> %s",
+ num == -1 ? "CLOSED" : tcp_event_str[num],
+ prev == -1 ? "CLOSED" : tcp_state_str[prev],
+ (new == -1 || num == -1) ? "CLOSED" : tcp_state_str[new]);
+ else
+ flow_dbg(conn, "%s",
+ num == -1 ? "CLOSED" : tcp_event_str[num]);
if ((event == TAP_FIN_RCVD) && !(conn->events & SOCK_FIN_RCVD))
conn_flag(c, conn, ACTIVE_CLOSE);
@@ -838,7 +835,7 @@ static void conn_event_do(const struct ctx *c, struct tcp_tap_conn *conn,
#define conn_event(c, conn, event) \
do { \
- trace("TCP: event at %s:%i", __func__, __LINE__); \
+ flow_trace(conn, "event at %s:%i", __func__, __LINE__); \
conn_event_do(c, conn, event); \
} while (0)
@@ -1206,9 +1203,8 @@ static void tcp_hash_insert(const struct ctx *c, struct tcp_tap_conn *conn)
conn->next_index = tc_hash[b] ? FLOW_IDX(tc_hash[b]) : -1U;
tc_hash[b] = conn;
- debug("TCP: hash table insert: index %u, sock %i, bucket: %i, next: "
- "%p", FLOW_IDX(conn), conn->sock, b,
- (void *)conn_at_idx(conn->next_index));
+ flow_dbg(conn, "hash table insert: sock %i, bucket: %i, next: %p",
+ conn->sock, b, (void *)conn_at_idx(conn->next_index));
}
/**
@@ -1233,9 +1229,9 @@ static void tcp_hash_remove(const struct ctx *c,
}
}
- debug("TCP: hash table remove: index %u, sock %i, bucket: %i, new: %p",
- FLOW_IDX(conn), conn->sock, b,
- (void *)(prev ? conn_at_idx(prev->next_index) : tc_hash[b]));
+ flow_dbg(conn, "hash table remove: sock %i, bucket: %i, new: %p",
+ conn->sock, b,
+ (void *)(prev ? conn_at_idx(prev->next_index) : tc_hash[b]));
}
/**
@@ -1318,8 +1314,7 @@ static void tcp_conn_destroy(struct ctx *c, union flow *flow)
static void tcp_rst_do(struct ctx *c, struct tcp_tap_conn *conn);
#define tcp_rst(c, conn) \
do { \
- debug("TCP: index %u, reset at %s:%i", FLOW_IDX(conn), \
- __func__, __LINE__); \
+ flow_dbg((conn), "TCP reset at %s:%i", __func__, __LINE__); \
tcp_rst_do(c, conn); \
} while (0)
@@ -1998,7 +1993,7 @@ static void tcp_conn_from_tap(struct ctx *c,
mss = tcp_conn_tap_mss(conn, opts, optlen);
if (setsockopt(s, SOL_TCP, TCP_MAXSEG, &mss, sizeof(mss)))
- trace("TCP: failed to set TCP_MAXSEG on socket %i", s);
+ flow_trace(conn, "failed to set TCP_MAXSEG on socket %i", s);
MSS_SET(conn, mss);
tcp_get_tap_ws(conn, opts, optlen);
@@ -2159,8 +2154,8 @@ static int tcp_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
if (SEQ_LT(already_sent, 0)) {
/* RFC 761, section 2.1. */
- trace("TCP: ACK sequence gap: ACK for %u, sent: %u",
- conn->seq_ack_from_tap, conn->seq_to_tap);
+ flow_trace(conn, "ACK sequence gap: ACK for %u, sent: %u",
+ conn->seq_ack_from_tap, conn->seq_to_tap);
conn->seq_to_tap = conn->seq_ack_from_tap;
already_sent = 0;
}
@@ -2392,8 +2387,9 @@ static int tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
tcp_tap_window_update(conn, max_ack_seq_wnd);
if (retr) {
- trace("TCP: fast re-transmit, ACK: %u, previous sequence: %u",
- max_ack_seq, conn->seq_to_tap);
+ flow_trace(conn,
+ "fast re-transmit, ACK: %u, previous sequence: %u",
+ max_ack_seq, conn->seq_to_tap);
conn->seq_ack_from_tap = max_ack_seq;
conn->seq_to_tap = max_ack_seq;
tcp_data_from_sock(c, conn);
@@ -2542,7 +2538,7 @@ int tcp_tap_handler(struct ctx *c, uint8_t pif, int af,
return 1;
}
- trace("TCP: packet length %lu from tap for index %u", len, FLOW_IDX(conn));
+ flow_trace(conn, "packet length %lu from tap", len);
if (th->rst) {
conn_event(c, conn, CLOSED);
@@ -2782,17 +2778,16 @@ void tcp_timer_handler(struct ctx *c, union epoll_ref ref)
tcp_timer_ctl(c, conn);
} else if (conn->flags & ACK_FROM_TAP_DUE) {
if (!(conn->events & ESTABLISHED)) {
- debug("TCP: index %u, handshake timeout", FLOW_IDX(conn));
+ flow_dbg(conn, "handshake timeout");
tcp_rst(c, conn);
} else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
- debug("TCP: index %u, FIN timeout", FLOW_IDX(conn));
+ flow_dbg(conn, "FIN timeout");
tcp_rst(c, conn);
} else if (conn->retrans == TCP_MAX_RETRANS) {
- debug("TCP: index %u, retransmissions count exceeded",
- FLOW_IDX(conn));
+ flow_dbg(conn, "retransmissions count exceeded");
tcp_rst(c, conn);
} else {
- debug("TCP: index %u, ACK timeout, retry", FLOW_IDX(conn));
+ flow_dbg(conn, "ACK timeout, retry");
conn->retrans++;
conn->seq_to_tap = conn->seq_ack_from_tap;
tcp_data_from_sock(c, conn);
@@ -2810,7 +2805,7 @@ void tcp_timer_handler(struct ctx *c, union epoll_ref ref)
*/
timerfd_settime(conn->timer, 0, &new, &old);
if (old.it_value.tv_sec == ACT_TIMEOUT) {
- debug("TCP: index %u, activity timeout", FLOW_IDX(conn));
+ flow_dbg(conn, "activity timeout");
tcp_rst(c, conn);
}
}
diff --git a/tcp_splice.c b/tcp_splice.c
index 212fe16..28b91e1 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -139,8 +139,7 @@ static int tcp_splice_epoll_ctl(const struct ctx *c,
if (epoll_ctl(c->epollfd, m, conn->s[0], &ev[0]) ||
epoll_ctl(c->epollfd, m, conn->s[1], &ev[1])) {
int ret = -errno;
- err("TCP (spliced): index %u, ERROR on epoll_ctl(): %s",
- FLOW_IDX(conn), strerror(errno));
+ flow_err(conn, "ERROR on epoll_ctl(): %s", strerror(errno));
return ret;
}
@@ -165,10 +164,9 @@ static void conn_flag_do(const struct ctx *c, struct tcp_splice_conn *conn,
return;
conn->flags &= flag;
- if (flag_index >= 0) {
- debug("TCP (spliced): index %u: %s dropped", FLOW_IDX(conn),
- tcp_splice_flag_str[flag_index]);
- }
+ if (flag_index >= 0)
+ flow_dbg(conn, "%s dropped",
+ tcp_splice_flag_str[flag_index]);
} else {
int flag_index = fls(flag);
@@ -176,10 +174,8 @@ static void conn_flag_do(const struct ctx *c, struct tcp_splice_conn *conn,
return;
conn->flags |= flag;
- if (flag_index >= 0) {
- debug("TCP (spliced): index %u: %s", FLOW_IDX(conn),
- tcp_splice_flag_str[flag_index]);
- }
+ if (flag_index >= 0)
+ flow_dbg(conn, "%s", tcp_splice_flag_str[flag_index]);
}
if (flag == CLOSING) {
@@ -190,8 +186,7 @@ static void conn_flag_do(const struct ctx *c, struct tcp_splice_conn *conn,
#define conn_flag(c, conn, flag) \
do { \
- trace("TCP (spliced): flag at %s:%i", \
- __func__, __LINE__); \
+ flow_trace(conn, "flag at %s:%i", __func__, __LINE__); \
conn_flag_do(c, conn, flag); \
} while (0)
@@ -211,10 +206,8 @@ static void conn_event_do(const struct ctx *c, struct tcp_splice_conn *conn,
return;
conn->events &= event;
- if (flag_index >= 0) {
- debug("TCP (spliced): index %u, ~%s", FLOW_IDX(conn),
- tcp_splice_event_str[flag_index]);
- }
+ if (flag_index >= 0)
+ flow_dbg(conn, "~%s", tcp_splice_event_str[flag_index]);
} else {
int flag_index = fls(event);
@@ -222,10 +215,8 @@ static void conn_event_do(const struct ctx *c, struct tcp_splice_conn *conn,
return;
conn->events |= event;
- if (flag_index >= 0) {
- debug("TCP (spliced): index %u, %s", FLOW_IDX(conn),
- tcp_splice_event_str[flag_index]);
- }
+ if (flag_index >= 0)
+ flow_dbg(conn, "%s", tcp_splice_event_str[flag_index]);
}
if (tcp_splice_epoll_ctl(c, conn))
@@ -234,8 +225,7 @@ static void conn_event_do(const struct ctx *c, struct tcp_splice_conn *conn,
#define conn_event(c, conn, event) \
do { \
- trace("TCP (spliced): event at %s:%i", \
- __func__, __LINE__); \
+ flow_trace(conn, "event at %s:%i",__func__, __LINE__); \
conn_event_do(c, conn, event); \
} while (0)
@@ -281,7 +271,7 @@ void tcp_splice_destroy(struct ctx *c, union flow *flow)
conn->events = SPLICE_CLOSED;
conn->flags = 0;
- debug("TCP (spliced): index %u, CLOSED", FLOW_IDX(conn));
+ flow_dbg(conn, "CLOSED");
flow_table_compact(c, flow);
}
@@ -314,16 +304,17 @@ static int tcp_splice_connect_finish(const struct ctx *c,
if (conn->pipe[side][0] < 0) {
if (pipe2(conn->pipe[side], O_NONBLOCK | O_CLOEXEC)) {
- err("TCP (spliced): cannot create %d->%d pipe: %s",
- side, !side, strerror(errno));
+ flow_err(conn, "cannot create %d->%d pipe: %s",
+ side, !side, strerror(errno));
conn_flag(c, conn, CLOSING);
return -EIO;
}
if (fcntl(conn->pipe[side][0], F_SETPIPE_SZ,
c->tcp.pipe_size)) {
- trace("TCP (spliced): cannot set %d->%d pipe size to %lu",
- side, !side, c->tcp.pipe_size);
+ flow_trace(conn,
+ "cannot set %d->%d pipe size to %lu",
+ side, !side, c->tcp.pipe_size);
}
}
}
@@ -363,8 +354,8 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
if (setsockopt(conn->s[1], SOL_TCP, TCP_QUICKACK,
&((int){ 1 }), sizeof(int))) {
- trace("TCP (spliced): failed to set TCP_QUICKACK on socket %i",
- conn->s[1]);
+ flow_trace(conn, "failed to set TCP_QUICKACK on socket %i",
+ conn->s[1]);
}
if (CONN_V6(conn)) {
@@ -475,7 +466,7 @@ bool tcp_splice_conn_from_sock(const struct ctx *c,
}
if (setsockopt(s, SOL_TCP, TCP_QUICKACK, &((int){ 1 }), sizeof(int)))
- trace("TCP (spliced): failed to set TCP_QUICKACK on %i", s);
+ flow_trace(conn, "failed to set TCP_QUICKACK on %i", s);
conn->f.type = FLOW_TCP_SPLICE;
conn->s[0] = s;
@@ -555,7 +546,7 @@ retry:
readlen = splice(conn->s[fromside], NULL,
conn->pipe[fromside][1], NULL, c->tcp.pipe_size,
SPLICE_F_MOVE | SPLICE_F_NONBLOCK);
- trace("TCP (spliced): %li from read-side call", readlen);
+ flow_trace(conn, "%li from read-side call", readlen);
if (readlen < 0) {
if (errno == EINTR)
goto retry;
@@ -581,8 +572,8 @@ eintr:
written = splice(conn->pipe[fromside][0], NULL,
conn->s[!fromside], NULL, to_write,
SPLICE_F_MOVE | more | SPLICE_F_NONBLOCK);
- trace("TCP (spliced): %li from write-side call (passed %lu)",
- written, to_write);
+ flow_trace(conn, "%li from write-side call (passed %lu)",
+ written, to_write);
/* Most common case: skip updating counters. */
if (readlen > 0 && readlen == written) {
@@ -794,8 +785,8 @@ void tcp_splice_timer(struct ctx *c, union flow *flow)
if ((conn->flags & set) && !(conn->flags & act)) {
if (setsockopt(conn->s[side], SOL_SOCKET, SO_RCVLOWAT,
&((int){ 1 }), sizeof(int))) {
- trace("TCP (spliced): can't set SO_RCVLOWAT on "
- "%i", conn->s[side]);
+ flow_trace(conn, "can't set SO_RCVLOWAT on %d",
+ conn->s[side]);
}
conn_flag(c, conn, ~set);
}
--
@@ -139,8 +139,7 @@ static int tcp_splice_epoll_ctl(const struct ctx *c,
if (epoll_ctl(c->epollfd, m, conn->s[0], &ev[0]) ||
epoll_ctl(c->epollfd, m, conn->s[1], &ev[1])) {
int ret = -errno;
- err("TCP (spliced): index %u, ERROR on epoll_ctl(): %s",
- FLOW_IDX(conn), strerror(errno));
+ flow_err(conn, "ERROR on epoll_ctl(): %s", strerror(errno));
return ret;
}
@@ -165,10 +164,9 @@ static void conn_flag_do(const struct ctx *c, struct tcp_splice_conn *conn,
return;
conn->flags &= flag;
- if (flag_index >= 0) {
- debug("TCP (spliced): index %u: %s dropped", FLOW_IDX(conn),
- tcp_splice_flag_str[flag_index]);
- }
+ if (flag_index >= 0)
+ flow_dbg(conn, "%s dropped",
+ tcp_splice_flag_str[flag_index]);
} else {
int flag_index = fls(flag);
@@ -176,10 +174,8 @@ static void conn_flag_do(const struct ctx *c, struct tcp_splice_conn *conn,
return;
conn->flags |= flag;
- if (flag_index >= 0) {
- debug("TCP (spliced): index %u: %s", FLOW_IDX(conn),
- tcp_splice_flag_str[flag_index]);
- }
+ if (flag_index >= 0)
+ flow_dbg(conn, "%s", tcp_splice_flag_str[flag_index]);
}
if (flag == CLOSING) {
@@ -190,8 +186,7 @@ static void conn_flag_do(const struct ctx *c, struct tcp_splice_conn *conn,
#define conn_flag(c, conn, flag) \
do { \
- trace("TCP (spliced): flag at %s:%i", \
- __func__, __LINE__); \
+ flow_trace(conn, "flag at %s:%i", __func__, __LINE__); \
conn_flag_do(c, conn, flag); \
} while (0)
@@ -211,10 +206,8 @@ static void conn_event_do(const struct ctx *c, struct tcp_splice_conn *conn,
return;
conn->events &= event;
- if (flag_index >= 0) {
- debug("TCP (spliced): index %u, ~%s", FLOW_IDX(conn),
- tcp_splice_event_str[flag_index]);
- }
+ if (flag_index >= 0)
+ flow_dbg(conn, "~%s", tcp_splice_event_str[flag_index]);
} else {
int flag_index = fls(event);
@@ -222,10 +215,8 @@ static void conn_event_do(const struct ctx *c, struct tcp_splice_conn *conn,
return;
conn->events |= event;
- if (flag_index >= 0) {
- debug("TCP (spliced): index %u, %s", FLOW_IDX(conn),
- tcp_splice_event_str[flag_index]);
- }
+ if (flag_index >= 0)
+ flow_dbg(conn, "%s", tcp_splice_event_str[flag_index]);
}
if (tcp_splice_epoll_ctl(c, conn))
@@ -234,8 +225,7 @@ static void conn_event_do(const struct ctx *c, struct tcp_splice_conn *conn,
#define conn_event(c, conn, event) \
do { \
- trace("TCP (spliced): event at %s:%i", \
- __func__, __LINE__); \
+ flow_trace(conn, "event at %s:%i",__func__, __LINE__); \
conn_event_do(c, conn, event); \
} while (0)
@@ -281,7 +271,7 @@ void tcp_splice_destroy(struct ctx *c, union flow *flow)
conn->events = SPLICE_CLOSED;
conn->flags = 0;
- debug("TCP (spliced): index %u, CLOSED", FLOW_IDX(conn));
+ flow_dbg(conn, "CLOSED");
flow_table_compact(c, flow);
}
@@ -314,16 +304,17 @@ static int tcp_splice_connect_finish(const struct ctx *c,
if (conn->pipe[side][0] < 0) {
if (pipe2(conn->pipe[side], O_NONBLOCK | O_CLOEXEC)) {
- err("TCP (spliced): cannot create %d->%d pipe: %s",
- side, !side, strerror(errno));
+ flow_err(conn, "cannot create %d->%d pipe: %s",
+ side, !side, strerror(errno));
conn_flag(c, conn, CLOSING);
return -EIO;
}
if (fcntl(conn->pipe[side][0], F_SETPIPE_SZ,
c->tcp.pipe_size)) {
- trace("TCP (spliced): cannot set %d->%d pipe size to %lu",
- side, !side, c->tcp.pipe_size);
+ flow_trace(conn,
+ "cannot set %d->%d pipe size to %lu",
+ side, !side, c->tcp.pipe_size);
}
}
}
@@ -363,8 +354,8 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
if (setsockopt(conn->s[1], SOL_TCP, TCP_QUICKACK,
&((int){ 1 }), sizeof(int))) {
- trace("TCP (spliced): failed to set TCP_QUICKACK on socket %i",
- conn->s[1]);
+ flow_trace(conn, "failed to set TCP_QUICKACK on socket %i",
+ conn->s[1]);
}
if (CONN_V6(conn)) {
@@ -475,7 +466,7 @@ bool tcp_splice_conn_from_sock(const struct ctx *c,
}
if (setsockopt(s, SOL_TCP, TCP_QUICKACK, &((int){ 1 }), sizeof(int)))
- trace("TCP (spliced): failed to set TCP_QUICKACK on %i", s);
+ flow_trace(conn, "failed to set TCP_QUICKACK on %i", s);
conn->f.type = FLOW_TCP_SPLICE;
conn->s[0] = s;
@@ -555,7 +546,7 @@ retry:
readlen = splice(conn->s[fromside], NULL,
conn->pipe[fromside][1], NULL, c->tcp.pipe_size,
SPLICE_F_MOVE | SPLICE_F_NONBLOCK);
- trace("TCP (spliced): %li from read-side call", readlen);
+ flow_trace(conn, "%li from read-side call", readlen);
if (readlen < 0) {
if (errno == EINTR)
goto retry;
@@ -581,8 +572,8 @@ eintr:
written = splice(conn->pipe[fromside][0], NULL,
conn->s[!fromside], NULL, to_write,
SPLICE_F_MOVE | more | SPLICE_F_NONBLOCK);
- trace("TCP (spliced): %li from write-side call (passed %lu)",
- written, to_write);
+ flow_trace(conn, "%li from write-side call (passed %lu)",
+ written, to_write);
/* Most common case: skip updating counters. */
if (readlen > 0 && readlen == written) {
@@ -794,8 +785,8 @@ void tcp_splice_timer(struct ctx *c, union flow *flow)
if ((conn->flags & set) && !(conn->flags & act)) {
if (setsockopt(conn->s[side], SOL_SOCKET, SO_RCVLOWAT,
&((int){ 1 }), sizeof(int))) {
- trace("TCP (spliced): can't set SO_RCVLOWAT on "
- "%i", conn->s[side]);
+ flow_trace(conn, "can't set SO_RCVLOWAT on %d",
+ conn->s[side]);
}
conn_flag(c, conn, ~set);
}
--
2.43.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 07/11] flow: Introduce 'sidx' type to represent one side of one flow
2023-11-26 23:33 [PATCH v2 00/11] Introduce unified flow table, first steps David Gibson
` (5 preceding siblings ...)
2023-11-26 23:33 ` [PATCH v2 06/11] flow, tcp: Add logging helpers for connection related messages David Gibson
@ 2023-11-26 23:33 ` David Gibson
2023-11-29 14:32 ` Stefano Brivio
2023-11-26 23:33 ` [PATCH v2 08/11] tcp: Remove unneccessary bounds check in tcp_timer_handler() David Gibson
` (3 subsequent siblings)
10 siblings, 1 reply; 18+ messages in thread
From: David Gibson @ 2023-11-26 23:33 UTC (permalink / raw)
To: Stefano Brivio, passt-dev; +Cc: David Gibson
In a number of places, we use indices into the flow table to identify a
specific flow. We also have cases where we need to identify a particular
side of a particular flow, and we expect those to become more common as
we generalise the flow table to cover more things.
To assist with that, introduces flow_sidx_t, an index type which identifies
a specific side of a specific flow in the table.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
flow.h | 13 +++++++++++++
flow_table.h | 36 ++++++++++++++++++++++++++++++++++++
2 files changed, 49 insertions(+)
diff --git a/flow.h b/flow.h
index b6da516..3c90bbd 100644
--- a/flow.h
+++ b/flow.h
@@ -39,6 +39,19 @@ struct flow_common {
#define FLOW_TABLE_PRESSURE 30 /* % of FLOW_MAX */
#define FLOW_FILE_PRESSURE 30 /* % of c->nofile */
+/**
+ * struct flow_sidx - ID for one side of a specific flow
+ * @side: Side referenced (0 or 1)
+ * @flow: Index of flow referenced
+ */
+typedef struct flow_sidx {
+ int side :1;
+ unsigned flow :FLOW_INDEX_BITS;
+} flow_sidx_t;
+static_assert(sizeof(flow_sidx_t) <= sizeof(uint32_t));
+
+#define FLOW_SIDX_NONE ((flow_sidx_t){ .flow = FLOW_MAX })
+
union flow;
void flow_table_compact(struct ctx *c, union flow *hole);
diff --git a/flow_table.h b/flow_table.h
index 5e897bd..3c68d4a 100644
--- a/flow_table.h
+++ b/flow_table.h
@@ -47,4 +47,40 @@ static inline unsigned flow_idx(const struct flow_common *f)
*/
#define FLOW(idx) (&flowtab[(idx)])
+/** flow_at_sidx - Flow entry for a given sidx
+ * @sidx: Flow & side index
+ *
+ * Return: pointer to the corresponding flow entry, or NULL
+ */
+static inline union flow *flow_at_sidx(flow_sidx_t sidx)
+{
+ if (sidx.flow >= FLOW_MAX)
+ return NULL;
+ return FLOW(sidx.flow);
+}
+
+/** flow_sidx_t - Index of one side of a flow from common structure
+ * @f: Common flow fields pointer
+ * @side: Which side to refer to (0 or 1)
+ *
+ * Return: index of @f and @side in the flow table
+ */
+static inline flow_sidx_t flow_sidx(const struct flow_common *f,
+ int side)
+{
+ ASSERT(side == !!side);
+ return (flow_sidx_t){
+ .side = side,
+ .flow = flow_idx(f),
+ };
+}
+
+/** FLOW_SIDX - Find the index of one side of a flow
+ * @f_: Flow pointer, either union flow * or protocol specific
+ * @side: Which side to index (0 or 1)
+ *
+ * Return: index of @f and @side in the flow table
+ */
+#define FLOW_SIDX(f_, side) (flow_sidx(&(f_)->f, (side)))
+
#endif /* FLOW_TABLE_H */
--
@@ -47,4 +47,40 @@ static inline unsigned flow_idx(const struct flow_common *f)
*/
#define FLOW(idx) (&flowtab[(idx)])
+/** flow_at_sidx - Flow entry for a given sidx
+ * @sidx: Flow & side index
+ *
+ * Return: pointer to the corresponding flow entry, or NULL
+ */
+static inline union flow *flow_at_sidx(flow_sidx_t sidx)
+{
+ if (sidx.flow >= FLOW_MAX)
+ return NULL;
+ return FLOW(sidx.flow);
+}
+
+/** flow_sidx_t - Index of one side of a flow from common structure
+ * @f: Common flow fields pointer
+ * @side: Which side to refer to (0 or 1)
+ *
+ * Return: index of @f and @side in the flow table
+ */
+static inline flow_sidx_t flow_sidx(const struct flow_common *f,
+ int side)
+{
+ ASSERT(side == !!side);
+ return (flow_sidx_t){
+ .side = side,
+ .flow = flow_idx(f),
+ };
+}
+
+/** FLOW_SIDX - Find the index of one side of a flow
+ * @f_: Flow pointer, either union flow * or protocol specific
+ * @side: Which side to index (0 or 1)
+ *
+ * Return: index of @f and @side in the flow table
+ */
+#define FLOW_SIDX(f_, side) (flow_sidx(&(f_)->f, (side)))
+
#endif /* FLOW_TABLE_H */
--
2.43.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH v2 07/11] flow: Introduce 'sidx' type to represent one side of one flow
2023-11-26 23:33 ` [PATCH v2 07/11] flow: Introduce 'sidx' type to represent one side of one flow David Gibson
@ 2023-11-29 14:32 ` Stefano Brivio
2023-11-30 0:37 ` David Gibson
0 siblings, 1 reply; 18+ messages in thread
From: Stefano Brivio @ 2023-11-29 14:32 UTC (permalink / raw)
To: David Gibson; +Cc: passt-dev
On Mon, 27 Nov 2023 10:33:44 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:
> In a number of places, we use indices into the flow table to identify a
> specific flow. We also have cases where we need to identify a particular
> side of a particular flow, and we expect those to become more common as
> we generalise the flow table to cover more things.
>
> To assist with that, introduces flow_sidx_t, an index type which identifies
> a specific side of a specific flow in the table.
>
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
> flow.h | 13 +++++++++++++
> flow_table.h | 36 ++++++++++++++++++++++++++++++++++++
> 2 files changed, 49 insertions(+)
>
> diff --git a/flow.h b/flow.h
> index b6da516..3c90bbd 100644
> --- a/flow.h
> +++ b/flow.h
> @@ -39,6 +39,19 @@ struct flow_common {
> #define FLOW_TABLE_PRESSURE 30 /* % of FLOW_MAX */
> #define FLOW_FILE_PRESSURE 30 /* % of c->nofile */
>
> +/**
> + * struct flow_sidx - ID for one side of a specific flow
> + * @side: Side referenced (0 or 1)
> + * @flow: Index of flow referenced
> + */
> +typedef struct flow_sidx {
Implying my usual argument :) ...is there any advantage over using this
simply as a struct?
> + int side :1;
> + unsigned flow :FLOW_INDEX_BITS;
> +} flow_sidx_t;
--
Stefano
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 07/11] flow: Introduce 'sidx' type to represent one side of one flow
2023-11-29 14:32 ` Stefano Brivio
@ 2023-11-30 0:37 ` David Gibson
2023-11-30 9:21 ` Stefano Brivio
0 siblings, 1 reply; 18+ messages in thread
From: David Gibson @ 2023-11-30 0:37 UTC (permalink / raw)
To: Stefano Brivio; +Cc: passt-dev
[-- Attachment #1: Type: text/plain, Size: 2140 bytes --]
On Wed, Nov 29, 2023 at 03:32:32PM +0100, Stefano Brivio wrote:
> On Mon, 27 Nov 2023 10:33:44 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
>
> > In a number of places, we use indices into the flow table to identify a
> > specific flow. We also have cases where we need to identify a particular
> > side of a particular flow, and we expect those to become more common as
> > we generalise the flow table to cover more things.
> >
> > To assist with that, introduces flow_sidx_t, an index type which identifies
> > a specific side of a specific flow in the table.
> >
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> > flow.h | 13 +++++++++++++
> > flow_table.h | 36 ++++++++++++++++++++++++++++++++++++
> > 2 files changed, 49 insertions(+)
> >
> > diff --git a/flow.h b/flow.h
> > index b6da516..3c90bbd 100644
> > --- a/flow.h
> > +++ b/flow.h
> > @@ -39,6 +39,19 @@ struct flow_common {
> > #define FLOW_TABLE_PRESSURE 30 /* % of FLOW_MAX */
> > #define FLOW_FILE_PRESSURE 30 /* % of c->nofile */
> >
> > +/**
> > + * struct flow_sidx - ID for one side of a specific flow
> > + * @side: Side referenced (0 or 1)
> > + * @flow: Index of flow referenced
> > + */
> > +typedef struct flow_sidx {
>
> Implying my usual argument :) ...is there any advantage over using this
> simply as a struct?
So, usually I too would prefer to use a struct as a struct, without a
typedef. The reason I'm doing differently here, is that I want to
emphasise that for many purposes this can be treated like an index, in
particular that it's small and trivially copyable. In particular it
should be passed by value, passing by reference would be silly.
That's kind of the opposite of what one tends to be conveying by
reminding users that they're working with a struct.
>
> > + int side :1;
> > + unsigned flow :FLOW_INDEX_BITS;
> > +} flow_sidx_t;
>
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 07/11] flow: Introduce 'sidx' type to represent one side of one flow
2023-11-30 0:37 ` David Gibson
@ 2023-11-30 9:21 ` Stefano Brivio
2023-12-01 0:10 ` David Gibson
0 siblings, 1 reply; 18+ messages in thread
From: Stefano Brivio @ 2023-11-30 9:21 UTC (permalink / raw)
To: David Gibson; +Cc: passt-dev
On Thu, 30 Nov 2023 11:37:40 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:
> On Wed, Nov 29, 2023 at 03:32:32PM +0100, Stefano Brivio wrote:
> > On Mon, 27 Nov 2023 10:33:44 +1100
> > David Gibson <david@gibson.dropbear.id.au> wrote:
> >
> > > In a number of places, we use indices into the flow table to identify a
> > > specific flow. We also have cases where we need to identify a particular
> > > side of a particular flow, and we expect those to become more common as
> > > we generalise the flow table to cover more things.
> > >
> > > To assist with that, introduces flow_sidx_t, an index type which identifies
> > > a specific side of a specific flow in the table.
> > >
> > > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > > ---
> > > flow.h | 13 +++++++++++++
> > > flow_table.h | 36 ++++++++++++++++++++++++++++++++++++
> > > 2 files changed, 49 insertions(+)
> > >
> > > diff --git a/flow.h b/flow.h
> > > index b6da516..3c90bbd 100644
> > > --- a/flow.h
> > > +++ b/flow.h
> > > @@ -39,6 +39,19 @@ struct flow_common {
> > > #define FLOW_TABLE_PRESSURE 30 /* % of FLOW_MAX */
> > > #define FLOW_FILE_PRESSURE 30 /* % of c->nofile */
> > >
> > > +/**
> > > + * struct flow_sidx - ID for one side of a specific flow
> > > + * @side: Side referenced (0 or 1)
> > > + * @flow: Index of flow referenced
> > > + */
> > > +typedef struct flow_sidx {
> >
> > Implying my usual argument :) ...is there any advantage over using this
> > simply as a struct?
>
> So, usually I too would prefer to use a struct as a struct, without a
> typedef. The reason I'm doing differently here, is that I want to
> emphasise that for many purposes this can be treated like an index, in
> particular that it's small and trivially copyable. In particular it
> should be passed by value, passing by reference would be silly.
Hmm, that was exactly my "not hiding" point though. The day somebody
adds here:
char mood[RLIMIT_STACK_VAL + 1]; /* list of side emojis */
the typedef makes it still apparently okay to pass by value. If it's a
struct, one surely has to check first.
> That's kind of the opposite of what one tends to be conveying by
> reminding users that they're working with a struct.
I see, but it's probably a matter of taste (passing structs by value
doesn't personally make me nervous).
--
Stefano
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 07/11] flow: Introduce 'sidx' type to represent one side of one flow
2023-11-30 9:21 ` Stefano Brivio
@ 2023-12-01 0:10 ` David Gibson
0 siblings, 0 replies; 18+ messages in thread
From: David Gibson @ 2023-12-01 0:10 UTC (permalink / raw)
To: Stefano Brivio; +Cc: passt-dev
[-- Attachment #1: Type: text/plain, Size: 3028 bytes --]
On Thu, Nov 30, 2023 at 10:21:16AM +0100, Stefano Brivio wrote:
> On Thu, 30 Nov 2023 11:37:40 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
>
> > On Wed, Nov 29, 2023 at 03:32:32PM +0100, Stefano Brivio wrote:
> > > On Mon, 27 Nov 2023 10:33:44 +1100
> > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > >
> > > > In a number of places, we use indices into the flow table to identify a
> > > > specific flow. We also have cases where we need to identify a particular
> > > > side of a particular flow, and we expect those to become more common as
> > > > we generalise the flow table to cover more things.
> > > >
> > > > To assist with that, introduces flow_sidx_t, an index type which identifies
> > > > a specific side of a specific flow in the table.
> > > >
> > > > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > > > ---
> > > > flow.h | 13 +++++++++++++
> > > > flow_table.h | 36 ++++++++++++++++++++++++++++++++++++
> > > > 2 files changed, 49 insertions(+)
> > > >
> > > > diff --git a/flow.h b/flow.h
> > > > index b6da516..3c90bbd 100644
> > > > --- a/flow.h
> > > > +++ b/flow.h
> > > > @@ -39,6 +39,19 @@ struct flow_common {
> > > > #define FLOW_TABLE_PRESSURE 30 /* % of FLOW_MAX */
> > > > #define FLOW_FILE_PRESSURE 30 /* % of c->nofile */
> > > >
> > > > +/**
> > > > + * struct flow_sidx - ID for one side of a specific flow
> > > > + * @side: Side referenced (0 or 1)
> > > > + * @flow: Index of flow referenced
> > > > + */
> > > > +typedef struct flow_sidx {
> > >
> > > Implying my usual argument :) ...is there any advantage over using this
> > > simply as a struct?
> >
> > So, usually I too would prefer to use a struct as a struct, without a
> > typedef. The reason I'm doing differently here, is that I want to
> > emphasise that for many purposes this can be treated like an index, in
> > particular that it's small and trivially copyable. In particular it
> > should be passed by value, passing by reference would be silly.
>
> Hmm, that was exactly my "not hiding" point though. The day somebody
> adds here:
>
> char mood[RLIMIT_STACK_VAL + 1]; /* list of side emojis */
>
> the typedef makes it still apparently okay to pass by value. If it's a
> struct, one surely has to check first.
Right.. and that's kind of the point. If someone adds that, this
struct is no longer doing the job intended for it, and a wider
redesign is needed. That's why there's also the static_assert()
verifying that flow_sidx_t fits in a u32.
> > That's kind of the opposite of what one tends to be conveying by
> > reminding users that they're working with a struct.
>
> I see, but it's probably a matter of taste (passing structs by value
> doesn't personally make me nervous).
>
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH v2 08/11] tcp: Remove unneccessary bounds check in tcp_timer_handler()
2023-11-26 23:33 [PATCH v2 00/11] Introduce unified flow table, first steps David Gibson
` (6 preceding siblings ...)
2023-11-26 23:33 ` [PATCH v2 07/11] flow: Introduce 'sidx' type to represent one side of one flow David Gibson
@ 2023-11-26 23:33 ` David Gibson
2023-11-29 14:32 ` Stefano Brivio
2023-11-26 23:33 ` [PATCH v2 09/11] flow,tcp: Generalise TCP epoll_ref to generic flows David Gibson
` (2 subsequent siblings)
10 siblings, 1 reply; 18+ messages in thread
From: David Gibson @ 2023-11-26 23:33 UTC (permalink / raw)
To: Stefano Brivio, passt-dev; +Cc: David Gibson
In tcp_timer_handler() we use conn_at_idx() to interpret the flow index
from the epoll reference. However, this will never be NULL - we always
put a valid index into the epoll_ref. Simplify slightly based on this.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
tcp.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tcp.c b/tcp.c
index f427047..512baf3 100644
--- a/tcp.c
+++ b/tcp.c
@@ -2759,10 +2759,10 @@ void tcp_listen_handler(struct ctx *c, union epoll_ref ref,
*/
void tcp_timer_handler(struct ctx *c, union epoll_ref ref)
{
- struct tcp_tap_conn *conn = conn_at_idx(ref.tcp.index);
struct itimerspec check_armed = { { 0 }, { 0 } };
+ struct tcp_tap_conn *conn = CONN(ref.tcp.index);
- if (c->no_tcp || !conn)
+ if (c->no_tcp)
return;
/* We don't reset timers on ~ACK_FROM_TAP_DUE, ~ACK_TO_TAP_DUE. If the
--
@@ -2759,10 +2759,10 @@ void tcp_listen_handler(struct ctx *c, union epoll_ref ref,
*/
void tcp_timer_handler(struct ctx *c, union epoll_ref ref)
{
- struct tcp_tap_conn *conn = conn_at_idx(ref.tcp.index);
struct itimerspec check_armed = { { 0 }, { 0 } };
+ struct tcp_tap_conn *conn = CONN(ref.tcp.index);
- if (c->no_tcp || !conn)
+ if (c->no_tcp)
return;
/* We don't reset timers on ~ACK_FROM_TAP_DUE, ~ACK_TO_TAP_DUE. If the
--
2.43.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH v2 08/11] tcp: Remove unneccessary bounds check in tcp_timer_handler()
2023-11-26 23:33 ` [PATCH v2 08/11] tcp: Remove unneccessary bounds check in tcp_timer_handler() David Gibson
@ 2023-11-29 14:32 ` Stefano Brivio
2023-11-30 0:42 ` David Gibson
0 siblings, 1 reply; 18+ messages in thread
From: Stefano Brivio @ 2023-11-29 14:32 UTC (permalink / raw)
To: David Gibson; +Cc: passt-dev
On Mon, 27 Nov 2023 10:33:45 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:
> In tcp_timer_handler() we use conn_at_idx() to interpret the flow index
> from the epoll reference. However, this will never be NULL - we always
> put a valid index into the epoll_ref. Simplify slightly based on this.
Sorry, I missed this during review of v1.
I have mixed feeling about this, and I don't think 11/11 changes
anything in this regard: we have to trust the kernel, as there's no
benefit to security in not doing so.
At the same time, should we ever get an out-of-bounds index from the
epoll event, we can fail gracefully here. Slightly worse, however: if
we get a timer event for a connection that's already closed, we'll
happily go and try to manipulate its timer (with or without the !conn
check).
All in all, I think the check is minimally useful, and we should have
something more robust than that. So if this patch helps keeping things
simple later in the series, I don't see an issue with that, but perhaps
we should add back a more sensible set of checks later.
The next patches all look good to me.
--
Stefano
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 08/11] tcp: Remove unneccessary bounds check in tcp_timer_handler()
2023-11-29 14:32 ` Stefano Brivio
@ 2023-11-30 0:42 ` David Gibson
0 siblings, 0 replies; 18+ messages in thread
From: David Gibson @ 2023-11-30 0:42 UTC (permalink / raw)
To: Stefano Brivio; +Cc: passt-dev
[-- Attachment #1: Type: text/plain, Size: 2036 bytes --]
On Wed, Nov 29, 2023 at 03:32:39PM +0100, Stefano Brivio wrote:
> On Mon, 27 Nov 2023 10:33:45 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
>
> > In tcp_timer_handler() we use conn_at_idx() to interpret the flow index
> > from the epoll reference. However, this will never be NULL - we always
> > put a valid index into the epoll_ref. Simplify slightly based on this.
>
> Sorry, I missed this during review of v1.
>
> I have mixed feeling about this, and I don't think 11/11 changes
> anything in this regard: we have to trust the kernel, as there's no
> benefit to security in not doing so.
>
> At the same time, should we ever get an out-of-bounds index from the
> epoll event, we can fail gracefully here. Slightly worse, however: if
> we get a timer event for a connection that's already closed, we'll
> happily go and try to manipulate its timer (with or without the !conn
> check).
So, as you note this only verifies that the index is theoretically
possible. It doesn't check that it's a valid index in the current
size of the connection table, it doesn't check if the connection is
already closed and it can't check if it's a stale index for a
different connection than the one originally intended, because the
table has been compacted.
Given all those potential failure modes, I don't see a lot of value in
checking if the index is wildly out of bounds, which would require a
kernel bug rather more extreme than those other possibilies.
> All in all, I think the check is minimally useful, and we should have
> something more robust than that. So if this patch helps keeping things
> simple later in the series, I don't see an issue with that, but perhaps
> we should add back a more sensible set of checks later.
Perhaps, yes.
> The next patches all look good to me.
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH v2 09/11] flow,tcp: Generalise TCP epoll_ref to generic flows
2023-11-26 23:33 [PATCH v2 00/11] Introduce unified flow table, first steps David Gibson
` (7 preceding siblings ...)
2023-11-26 23:33 ` [PATCH v2 08/11] tcp: Remove unneccessary bounds check in tcp_timer_handler() David Gibson
@ 2023-11-26 23:33 ` David Gibson
2023-11-26 23:33 ` [PATCH v2 10/11] tcp_splice: Use unsigned to represent side David Gibson
2023-11-26 23:33 ` [PATCH v2 11/11] flow,tcp: Use epoll_ref type including flow and side David Gibson
10 siblings, 0 replies; 18+ messages in thread
From: David Gibson @ 2023-11-26 23:33 UTC (permalink / raw)
To: Stefano Brivio, passt-dev; +Cc: David Gibson
TCP uses three different epoll object types: one for connected sockets, one
for timers and one for listening sockets. Listening sockets really need
information that's specific to TCP, so need their own epoll_ref field.
Timers and connected sockets, however, only need the connection (flow)
they're associated with. As we expand the use of the flow table, we expect
that to be true for more epoll fds. So, rename the "TCP" epoll_ref field
to be a "flow" epoll_ref field that can be used both for TCP and for other
future cases.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
passt.h | 6 +++---
tcp.c | 10 +++++-----
tcp_splice.c | 4 ++--
3 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/passt.h b/passt.h
index 0fce637..66a819f 100644
--- a/passt.h
+++ b/passt.h
@@ -76,8 +76,8 @@ enum epoll_type {
* union epoll_ref - Breakdown of reference for epoll fd bookkeeping
* @type: Type of fd (tells us what to do with events)
* @fd: File descriptor number (implies < 2^24 total descriptors)
- * @tcp: TCP-specific reference part (connected sockets)
- * @tcp_listen: TCP-specific reference part (listening sockets)
+ * @flow: Index of the flow this fd is linked to
+ * @tcp_listen: TCP-specific reference part for listening sockets
* @udp: UDP-specific reference part
* @icmp: ICMP-specific reference part
* @data: Data handled by protocol handlers
@@ -90,7 +90,7 @@ union epoll_ref {
#define FD_REF_MAX ((int)MAX_FROM_BITS(FD_REF_BITS))
int32_t fd:FD_REF_BITS;
union {
- union tcp_epoll_ref tcp;
+ uint32_t flow;
union tcp_listen_epoll_ref tcp_listen;
union udp_epoll_ref udp;
union icmp_epoll_ref icmp;
diff --git a/tcp.c b/tcp.c
index 512baf3..e80a6aa 100644
--- a/tcp.c
+++ b/tcp.c
@@ -639,7 +639,7 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
{
int m = conn->in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
union epoll_ref ref = { .type = EPOLL_TYPE_TCP, .fd = conn->sock,
- .tcp.index = FLOW_IDX(conn) };
+ .flow = FLOW_IDX(conn) };
struct epoll_event ev = { .data.u64 = ref.u64 };
if (conn->events == CLOSED) {
@@ -660,7 +660,7 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
if (conn->timer != -1) {
union epoll_ref ref_t = { .type = EPOLL_TYPE_TCP_TIMER,
.fd = conn->sock,
- .tcp.index = FLOW_IDX(conn) };
+ .flow = FLOW_IDX(conn) };
struct epoll_event ev_t = { .data.u64 = ref_t.u64,
.events = EPOLLIN | EPOLLET };
@@ -688,7 +688,7 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
if (conn->timer == -1) {
union epoll_ref ref = { .type = EPOLL_TYPE_TCP_TIMER,
.fd = conn->sock,
- .tcp.index = FLOW_IDX(conn) };
+ .flow = FLOW_IDX(conn) };
struct epoll_event ev = { .data.u64 = ref.u64,
.events = EPOLLIN | EPOLLET };
int fd;
@@ -2760,7 +2760,7 @@ void tcp_listen_handler(struct ctx *c, union epoll_ref ref,
void tcp_timer_handler(struct ctx *c, union epoll_ref ref)
{
struct itimerspec check_armed = { { 0 }, { 0 } };
- struct tcp_tap_conn *conn = CONN(ref.tcp.index);
+ struct tcp_tap_conn *conn = CONN(ref.flow);
if (c->no_tcp)
return;
@@ -2874,7 +2874,7 @@ static void tcp_tap_sock_handler(struct ctx *c, struct tcp_tap_conn *conn,
*/
void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events)
{
- union flow *flow = flowtab + ref.tcp.index;
+ union flow *flow = FLOW(ref.flow);
switch (flow->f.type) {
case FLOW_TCP:
diff --git a/tcp_splice.c b/tcp_splice.c
index 28b91e1..5ebc4e5 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -128,8 +128,8 @@ static int tcp_splice_epoll_ctl(const struct ctx *c,
{
int m = conn->in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
union epoll_ref ref[SIDES] = {
- { .type = EPOLL_TYPE_TCP, .fd = conn->s[0], .tcp.index = FLOW_IDX(conn) },
- { .type = EPOLL_TYPE_TCP, .fd = conn->s[1], .tcp.index = FLOW_IDX(conn) }
+ { .type = EPOLL_TYPE_TCP, .fd = conn->s[0], .flow = FLOW_IDX(conn) },
+ { .type = EPOLL_TYPE_TCP, .fd = conn->s[1], .flow = FLOW_IDX(conn) }
};
struct epoll_event ev[SIDES] = { { .data.u64 = ref[0].u64 },
{ .data.u64 = ref[1].u64 } };
--
@@ -128,8 +128,8 @@ static int tcp_splice_epoll_ctl(const struct ctx *c,
{
int m = conn->in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
union epoll_ref ref[SIDES] = {
- { .type = EPOLL_TYPE_TCP, .fd = conn->s[0], .tcp.index = FLOW_IDX(conn) },
- { .type = EPOLL_TYPE_TCP, .fd = conn->s[1], .tcp.index = FLOW_IDX(conn) }
+ { .type = EPOLL_TYPE_TCP, .fd = conn->s[0], .flow = FLOW_IDX(conn) },
+ { .type = EPOLL_TYPE_TCP, .fd = conn->s[1], .flow = FLOW_IDX(conn) }
};
struct epoll_event ev[SIDES] = { { .data.u64 = ref[0].u64 },
{ .data.u64 = ref[1].u64 } };
--
2.43.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 10/11] tcp_splice: Use unsigned to represent side
2023-11-26 23:33 [PATCH v2 00/11] Introduce unified flow table, first steps David Gibson
` (8 preceding siblings ...)
2023-11-26 23:33 ` [PATCH v2 09/11] flow,tcp: Generalise TCP epoll_ref to generic flows David Gibson
@ 2023-11-26 23:33 ` David Gibson
2023-11-26 23:33 ` [PATCH v2 11/11] flow,tcp: Use epoll_ref type including flow and side David Gibson
10 siblings, 0 replies; 18+ messages in thread
From: David Gibson @ 2023-11-26 23:33 UTC (permalink / raw)
To: Stefano Brivio, passt-dev; +Cc: David Gibson
Currently, we use 'int' values to represent the "side" of a connection,
which must always be 0 or 1. This turns out to be dangerous.
In some cases we're going to want to put the side into a 1-bit bitfield.
However, if that bitfield has type 'int', when we copy it out to a regular
'int' variable, it will be sign-extended and so have values 0 and -1,
instead of 0 and 1.
To avoid this, always use unsigned variables for the side.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
tcp_splice.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/tcp_splice.c b/tcp_splice.c
index 5ebc4e5..9cec9c6 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -249,7 +249,7 @@ void tcp_splice_conn_update(const struct ctx *c, struct tcp_splice_conn *new)
void tcp_splice_destroy(struct ctx *c, union flow *flow)
{
struct tcp_splice_conn *conn = &flow->tcp_splice;
- int side;
+ unsigned side;
for (side = 0; side < SIDES; side++) {
if (conn->events & SPLICE_ESTABLISHED) {
@@ -286,8 +286,8 @@ void tcp_splice_destroy(struct ctx *c, union flow *flow)
static int tcp_splice_connect_finish(const struct ctx *c,
struct tcp_splice_conn *conn)
{
+ unsigned side;
int i = 0;
- int side;
for (side = 0; side < SIDES; side++) {
conn->pipe[side][0] = conn->pipe[side][1] = -1;
@@ -490,7 +490,8 @@ void tcp_splice_sock_handler(struct ctx *c, struct tcp_splice_conn *conn,
int s, uint32_t events)
{
uint8_t lowat_set_flag, lowat_act_flag;
- int fromside, eof, never_read;
+ int eof, never_read;
+ unsigned fromside;
if (conn->events == SPLICE_CLOSED)
return;
--
@@ -249,7 +249,7 @@ void tcp_splice_conn_update(const struct ctx *c, struct tcp_splice_conn *new)
void tcp_splice_destroy(struct ctx *c, union flow *flow)
{
struct tcp_splice_conn *conn = &flow->tcp_splice;
- int side;
+ unsigned side;
for (side = 0; side < SIDES; side++) {
if (conn->events & SPLICE_ESTABLISHED) {
@@ -286,8 +286,8 @@ void tcp_splice_destroy(struct ctx *c, union flow *flow)
static int tcp_splice_connect_finish(const struct ctx *c,
struct tcp_splice_conn *conn)
{
+ unsigned side;
int i = 0;
- int side;
for (side = 0; side < SIDES; side++) {
conn->pipe[side][0] = conn->pipe[side][1] = -1;
@@ -490,7 +490,8 @@ void tcp_splice_sock_handler(struct ctx *c, struct tcp_splice_conn *conn,
int s, uint32_t events)
{
uint8_t lowat_set_flag, lowat_act_flag;
- int fromside, eof, never_read;
+ int eof, never_read;
+ unsigned fromside;
if (conn->events == SPLICE_CLOSED)
return;
--
2.43.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 11/11] flow,tcp: Use epoll_ref type including flow and side
2023-11-26 23:33 [PATCH v2 00/11] Introduce unified flow table, first steps David Gibson
` (9 preceding siblings ...)
2023-11-26 23:33 ` [PATCH v2 10/11] tcp_splice: Use unsigned to represent side David Gibson
@ 2023-11-26 23:33 ` David Gibson
10 siblings, 0 replies; 18+ messages in thread
From: David Gibson @ 2023-11-26 23:33 UTC (permalink / raw)
To: Stefano Brivio, passt-dev; +Cc: David Gibson
Currently TCP uses the 'flow' epoll_ref field for both connected
sockets and timers, which consists of just the index of the relevant
flow (connection).
This is just fine for timers, for while it obviously works, it's
subtly incomplete for sockets on spliced connections. In that case we
want to know which side of the connection the event is occurring on as
well as which connection. At present, we deduce that information by
looking at the actual fd, and comparing it to the fds of the sockets
on each side.
When we use the flow table for more things, we expect more cases where
something will need to know a specific side of a specific flow for an
event, but nothing more.
Therefore add a new 'flowside' epoll_ref field, with exactly that
information. We use it for TCP connected sockets. This allows us to
directly know the side for spliced connections. For "tap"
connections, it's pretty meaningless, since the side is always the
socket side. It still makes logical sense though, and it may become
important for future flow table work.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
flow.h | 2 +-
passt.h | 2 ++
tcp.c | 11 ++++++++---
tcp_splice.c | 37 ++++++++++++-------------------------
tcp_splice.h | 2 +-
5 files changed, 24 insertions(+), 30 deletions(-)
diff --git a/flow.h b/flow.h
index 3c90bbd..75e2115 100644
--- a/flow.h
+++ b/flow.h
@@ -45,7 +45,7 @@ struct flow_common {
* @flow: Index of flow referenced
*/
typedef struct flow_sidx {
- int side :1;
+ unsigned side :1;
unsigned flow :FLOW_INDEX_BITS;
} flow_sidx_t;
static_assert(sizeof(flow_sidx_t) <= sizeof(uint32_t));
diff --git a/passt.h b/passt.h
index 66a819f..33b493f 100644
--- a/passt.h
+++ b/passt.h
@@ -37,6 +37,7 @@ union epoll_ref;
#include "pif.h"
#include "packet.h"
+#include "flow.h"
#include "icmp.h"
#include "port_fwd.h"
#include "tcp.h"
@@ -91,6 +92,7 @@ union epoll_ref {
int32_t fd:FD_REF_BITS;
union {
uint32_t flow;
+ flow_sidx_t flowside;
union tcp_listen_epoll_ref tcp_listen;
union udp_epoll_ref udp;
union icmp_epoll_ref icmp;
diff --git a/tcp.c b/tcp.c
index e80a6aa..24aba44 100644
--- a/tcp.c
+++ b/tcp.c
@@ -304,6 +304,10 @@
#include "tcp_conn.h"
#include "flow_table.h"
+/* Sides of a flow as we use them in "tap" connections */
+#define SOCKSIDE 0
+#define TAPSIDE 1
+
#define TCP_FRAMES_MEM 128
#define TCP_FRAMES \
(c->mode == MODE_PASST ? TCP_FRAMES_MEM : 1)
@@ -639,7 +643,7 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
{
int m = conn->in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
union epoll_ref ref = { .type = EPOLL_TYPE_TCP, .fd = conn->sock,
- .flow = FLOW_IDX(conn) };
+ .flowside = FLOW_SIDX(conn, SOCKSIDE) };
struct epoll_event ev = { .data.u64 = ref.u64 };
if (conn->events == CLOSED) {
@@ -2874,14 +2878,15 @@ static void tcp_tap_sock_handler(struct ctx *c, struct tcp_tap_conn *conn,
*/
void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events)
{
- union flow *flow = FLOW(ref.flow);
+ union flow *flow = FLOW(ref.flowside.flow);
switch (flow->f.type) {
case FLOW_TCP:
tcp_tap_sock_handler(c, &flow->tcp, events);
break;
case FLOW_TCP_SPLICE:
- tcp_splice_sock_handler(c, &flow->tcp_splice, ref.fd, events);
+ tcp_splice_sock_handler(c, &flow->tcp_splice, ref.flowside.side,
+ events);
break;
default:
die("Unexpected %s in tcp_sock_handler_compact()",
diff --git a/tcp_splice.c b/tcp_splice.c
index 9cec9c6..d5c351b 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -128,8 +128,10 @@ static int tcp_splice_epoll_ctl(const struct ctx *c,
{
int m = conn->in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
union epoll_ref ref[SIDES] = {
- { .type = EPOLL_TYPE_TCP, .fd = conn->s[0], .flow = FLOW_IDX(conn) },
- { .type = EPOLL_TYPE_TCP, .fd = conn->s[1], .flow = FLOW_IDX(conn) }
+ { .type = EPOLL_TYPE_TCP, .fd = conn->s[0],
+ .flowside = FLOW_SIDX(conn, 0) },
+ { .type = EPOLL_TYPE_TCP, .fd = conn->s[1],
+ .flowside = FLOW_SIDX(conn, 1) }
};
struct epoll_event ev[SIDES] = { { .data.u64 = ref[0].u64 },
{ .data.u64 = ref[1].u64 } };
@@ -481,13 +483,13 @@ bool tcp_splice_conn_from_sock(const struct ctx *c,
* tcp_splice_sock_handler() - Handler for socket mapped to spliced connection
* @c: Execution context
* @conn: Connection state
- * @s: Socket fd on which an event has occurred
+ * @side: Side of the connection on which an event has occurred
* @events: epoll events bitmap
*
* #syscalls:pasta splice
*/
void tcp_splice_sock_handler(struct ctx *c, struct tcp_splice_conn *conn,
- int s, uint32_t events)
+ int side, uint32_t events)
{
uint8_t lowat_set_flag, lowat_act_flag;
int eof, never_read;
@@ -507,30 +509,15 @@ void tcp_splice_sock_handler(struct ctx *c, struct tcp_splice_conn *conn,
}
if (events & EPOLLOUT) {
- if (s == conn->s[0]) {
- conn_event(c, conn, ~OUT_WAIT_0);
- fromside = 1;
- } else {
- conn_event(c, conn, ~OUT_WAIT_1);
- fromside = 0;
- }
+ fromside = !side;
+ conn_event(c, conn, side == 0 ? ~OUT_WAIT_0 : ~OUT_WAIT_1);
} else {
- fromside = s == conn->s[0] ? 0 : 1;
- }
-
- if (events & EPOLLRDHUP) {
- if (s == conn->s[0])
- conn_event(c, conn, FIN_RCVD_0);
- else
- conn_event(c, conn, FIN_RCVD_1);
+ fromside = side;
}
- if (events & EPOLLHUP) {
- if (s == conn->s[0])
- conn_event(c, conn, FIN_SENT_0); /* Fake, but implied */
- else
- conn_event(c, conn, FIN_SENT_1);
- }
+ if (events & EPOLLRDHUP)
+ /* For side 0 this is fake, but implied */
+ conn_event(c, conn, side == 0 ? FIN_RCVD_0 : FIN_RCVD_1);
swap:
eof = 0;
diff --git a/tcp_splice.h b/tcp_splice.h
index dc486f1..aa85c7c 100644
--- a/tcp_splice.h
+++ b/tcp_splice.h
@@ -9,7 +9,7 @@
struct tcp_splice_conn;
void tcp_splice_sock_handler(struct ctx *c, struct tcp_splice_conn *conn,
- int s, uint32_t events);
+ int side, uint32_t events);
bool tcp_splice_conn_from_sock(const struct ctx *c,
union tcp_listen_epoll_ref ref,
struct tcp_splice_conn *conn, int s,
--
@@ -9,7 +9,7 @@
struct tcp_splice_conn;
void tcp_splice_sock_handler(struct ctx *c, struct tcp_splice_conn *conn,
- int s, uint32_t events);
+ int side, uint32_t events);
bool tcp_splice_conn_from_sock(const struct ctx *c,
union tcp_listen_epoll_ref ref,
struct tcp_splice_conn *conn, int s,
--
2.43.0
^ permalink raw reply related [flat|nested] 18+ messages in thread