public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: passt-dev@passt.top, Stefano Brivio <sbrivio@redhat.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH 07/32] tcp: Move connection state structures into a shared header
Date: Wed, 16 Nov 2022 15:41:47 +1100	[thread overview]
Message-ID: <20221116044212.3876516-8-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20221116044212.3876516-1-david@gibson.dropbear.id.au>

Currently spliced and non-spliced connections use completely independent
tracking structures.  We want to unify these, so as a preliminary step move
the definitions for both variants into a new tcp_conn.h header, shared by
tcp.c and tcp_splice.c.

This requires renaming some #defines with the same name but different
meanings between the two cases.  In the process we correct some places that
are slightly out of sync between the comments and the code for various
event bit names.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 Makefile     |   3 +-
 tcp.c        | 206 +++++++++++++--------------------------------------
 tcp_conn.h   | 168 +++++++++++++++++++++++++++++++++++++++++
 tcp_splice.c |  93 +++++++----------------
 4 files changed, 245 insertions(+), 225 deletions(-)
 create mode 100644 tcp_conn.h

diff --git a/Makefile b/Makefile
index 8bcbbc0..9046b0b 100644
--- a/Makefile
+++ b/Makefile
@@ -45,7 +45,8 @@ MANPAGES = passt.1 pasta.1 qrap.1
 
 PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h icmp.h \
 	isolation.h lineread.h log.h ndp.h netlink.h packet.h passt.h pasta.h \
-	pcap.h port_fwd.h siphash.h tap.h tcp.h tcp_splice.h udp.h util.h
+	pcap.h port_fwd.h siphash.h tap.h tcp.h tcp_conn.h tcp_splice.h udp.h \
+	util.h
 HEADERS = $(PASST_HEADERS) seccomp.h
 
 # On gcc 11 and 12, with -O2 and -flto, tcp_hash() and siphash_20b(), if
diff --git a/tcp.c b/tcp.c
index 4e56a6c..1137f45 100644
--- a/tcp.c
+++ b/tcp.c
@@ -98,7 +98,7 @@
  * Connection tracking and storage
  * -------------------------------
  *
- * Connections are tracked by the @tc array of struct tcp_conn, containing
+ * Connections are tracked by the @tc array of struct tcp_tap_conn, containing
  * addresses, ports, TCP states and parameters. This is statically allocated and
  * indexed by an arbitrary connection number. The array is compacted whenever a
  * connection is closed, by remapping the highest connection index in use to the
@@ -301,6 +301,8 @@
 #include "tcp_splice.h"
 #include "log.h"
 
+#include "tcp_conn.h"
+
 #define TCP_FRAMES_MEM			128
 #define TCP_FRAMES							\
 	(c->mode == MODE_PASST ? TCP_FRAMES_MEM : 1)
@@ -308,7 +310,6 @@
 #define TCP_FILE_PRESSURE		30	/* % of c->nofile */
 #define TCP_CONN_PRESSURE		30	/* % of c->tcp.conn_count */
 
-#define TCP_HASH_BUCKET_BITS		(TCP_CONN_INDEX_BITS + 1)
 #define TCP_HASH_TABLE_LOAD		70		/* % */
 #define TCP_HASH_TABLE_SIZE		(TCP_MAX_CONNS * 100 /		\
 					 TCP_HASH_TABLE_LOAD)
@@ -402,117 +403,8 @@ struct tcp6_l2_head {	/* For MSS6 macro: keep in sync with tcp6_l2_buf_t */
 #define OPT_SACK	5
 #define OPT_TS		8
 
-/**
- * struct tcp_conn - Descriptor for a TCP connection (not spliced)
- * @next_index:		Connection index of next item in hash chain, -1 for none
- * @tap_mss:		MSS advertised by tap/guest, rounded to 2 ^ TCP_MSS_BITS
- * @sock:		Socket descriptor number
- * @events:		Connection events, implying connection states
- * @timer:		timerfd descriptor for timeout events
- * @flags:		Connection flags representing internal attributes
- * @hash_bucket:	Bucket index in connection lookup hash table
- * @retrans:		Number of retransmissions occurred due to ACK_TIMEOUT
- * @ws_from_tap:	Window scaling factor advertised from tap/guest
- * @ws_to_tap:		Window scaling factor advertised to tap/guest
- * @sndbuf:		Sending buffer in kernel, rounded to 2 ^ SNDBUF_BITS
- * @seq_dup_ack_approx:	Last duplicate ACK number sent to tap
- * @a.a6:		IPv6 remote address, can be IPv4-mapped
- * @a.a4.zero:		Zero prefix for IPv4-mapped, see RFC 6890, Table 20
- * @a.a4.one:		Ones prefix for IPv4-mapped
- * @a.a4.a:		IPv4 address
- * @tap_port:		Guest-facing tap port
- * @sock_port:		Remote, socket-facing port
- * @wnd_from_tap:	Last window size from tap, unscaled (as received)
- * @wnd_to_tap:		Sending window advertised to tap, unscaled (as sent)
- * @seq_to_tap:		Next sequence for packets to tap
- * @seq_ack_from_tap:	Last ACK number received from tap
- * @seq_from_tap:	Next sequence for packets from tap (not actually sent)
- * @seq_ack_to_tap:	Last ACK number sent to tap
- * @seq_init_from_tap:	Initial sequence number from tap
- */
-struct tcp_conn {
-	int	 	next_index	:TCP_CONN_INDEX_BITS + 2;
-
-#define TCP_RETRANS_BITS		3
-	unsigned int	retrans		:TCP_RETRANS_BITS;
-#define TCP_MAX_RETRANS			((1U << TCP_RETRANS_BITS) - 1)
-
-#define TCP_WS_BITS			4	/* RFC 7323 */
-#define TCP_WS_MAX			14
-	unsigned int	ws_from_tap	:TCP_WS_BITS;
-	unsigned int	ws_to_tap	:TCP_WS_BITS;
-
-
-	int		sock		:SOCKET_REF_BITS;
-
-	uint8_t		events;
-#define CLOSED			0
-#define SOCK_ACCEPTED		BIT(0)	/* implies SYN sent to tap */
-#define TAP_SYN_RCVD		BIT(1)	/* implies socket connecting */
-#define  TAP_SYN_ACK_SENT	BIT( 3)	/* implies socket connected */
-#define ESTABLISHED		BIT(2)
-#define  SOCK_FIN_RCVD		BIT( 3)
-#define  SOCK_FIN_SENT		BIT( 4)
-#define  TAP_FIN_RCVD		BIT( 5)
-#define  TAP_FIN_SENT		BIT( 6)
-#define  TAP_FIN_ACKED		BIT( 7)
-
-#define	CONN_STATE_BITS		/* Setting these clears other flags */	\
-	(SOCK_ACCEPTED | TAP_SYN_RCVD | ESTABLISHED)
-
-
-	int		timer		:SOCKET_REF_BITS;
-
-	uint8_t		flags;
-#define STALLED			BIT(0)
-#define LOCAL			BIT(1)
-#define WND_CLAMPED		BIT(2)
-#define IN_EPOLL		BIT(3)
-#define ACTIVE_CLOSE		BIT(4)
-#define ACK_TO_TAP_DUE		BIT(5)
-#define ACK_FROM_TAP_DUE	BIT(6)
-
-
-	unsigned int	hash_bucket	:TCP_HASH_BUCKET_BITS;
-
-#define TCP_MSS_BITS			14
-	unsigned int	tap_mss		:TCP_MSS_BITS;
-#define MSS_SET(conn, mss)	(conn->tap_mss = (mss >> (16 - TCP_MSS_BITS)))
-#define MSS_GET(conn)		(conn->tap_mss << (16 - TCP_MSS_BITS))
-
-
-#define SNDBUF_BITS		24
-	unsigned int	sndbuf		:SNDBUF_BITS;
-#define SNDBUF_SET(conn, bytes)	(conn->sndbuf = ((bytes) >> (32 - SNDBUF_BITS)))
-#define SNDBUF_GET(conn)	(conn->sndbuf << (32 - SNDBUF_BITS))
-
-	uint8_t		seq_dup_ack_approx;
-
-
-	union {
-		struct in6_addr a6;
-		struct {
-			uint8_t zero[10];
-			uint8_t one[2];
-			struct in_addr a;
-		} a4;
-	} a;
 #define CONN_V4(conn)		IN6_IS_ADDR_V4MAPPED(&conn->a.a6)
 #define CONN_V6(conn)		(!CONN_V4(conn))
-
-	in_port_t	tap_port;
-	in_port_t	sock_port;
-
-	uint16_t	wnd_from_tap;
-	uint16_t	wnd_to_tap;
-
-	uint32_t	seq_to_tap;
-	uint32_t	seq_ack_from_tap;
-	uint32_t	seq_from_tap;
-	uint32_t	seq_ack_to_tap;
-	uint32_t	seq_init_from_tap;
-};
-
 #define CONN_IS_CLOSING(conn)						\
 	((conn->events & ESTABLISHED) &&				\
 	 (conn->events & (SOCK_FIN_RCVD | TAP_FIN_RCVD)))
@@ -695,7 +587,7 @@ static unsigned int tcp6_l2_flags_buf_used;
 static size_t tcp6_l2_flags_buf_bytes;
 
 /* TCP connections */
-static struct tcp_conn tc[TCP_MAX_CONNS];
+static struct tcp_tap_conn tc[TCP_MAX_CONNS];
 
 #define CONN(index)		(tc + (index))
 #define CONN_IDX(conn)		((conn) - tc)
@@ -705,7 +597,7 @@ static struct tcp_conn tc[TCP_MAX_CONNS];
  *
  * Return:	Pointer to connection, or NULL if @index is out of bounds
  */
-static inline struct tcp_conn *conn_at_idx(int index)
+static inline struct tcp_tap_conn *conn_at_idx(int index)
 {
 	if ((index < 0) || (index >= TCP_MAX_CONNS))
 		return NULL;
@@ -713,7 +605,7 @@ static inline struct tcp_conn *conn_at_idx(int index)
 }
 
 /* Table for lookup from remote address, local port, remote port */
-static struct tcp_conn *tc_hash[TCP_HASH_TABLE_SIZE];
+static struct tcp_tap_conn *tc_hash[TCP_HASH_TABLE_SIZE];
 
 /* Pools for pre-opened sockets */
 int init_sock_pool4		[TCP_SOCK_POOL_SIZE];
@@ -749,7 +641,7 @@ static uint32_t tcp_conn_epoll_events(uint8_t events, uint8_t conn_flags)
 	return EPOLLRDHUP;
 }
 
-static void conn_flag_do(const struct ctx *c, struct tcp_conn *conn,
+static void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,
 			 unsigned long flag);
 #define conn_flag(c, conn, flag)					\
 	do {								\
@@ -764,7 +656,7 @@ static void conn_flag_do(const struct ctx *c, struct tcp_conn *conn,
  *
  * Return: 0 on success, negative error code on failure (not on deletion)
  */
-static int tcp_epoll_ctl(const struct ctx *c, struct tcp_conn *conn)
+static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
 {
 	int m = (conn->flags & IN_EPOLL) ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
 	union epoll_ref ref = { .r.proto = IPPROTO_TCP, .r.s = conn->sock,
@@ -809,7 +701,7 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_conn *conn)
  *
  * #syscalls timerfd_create timerfd_settime
  */
-static void tcp_timer_ctl(const struct ctx *c, struct tcp_conn *conn)
+static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
 {
 	struct itimerspec it = { { 0 }, { 0 } };
 
@@ -865,7 +757,7 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_conn *conn)
  * @conn:	Connection pointer
  * @flag:	Flag to set, or ~flag to unset
  */
-static void conn_flag_do(const struct ctx *c, struct tcp_conn *conn,
+static void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,
 			 unsigned long flag)
 {
 	if (flag & (flag - 1)) {
@@ -903,7 +795,7 @@ static void conn_flag_do(const struct ctx *c, struct tcp_conn *conn,
  * @conn:	Connection pointer
  * @event:	Connection event
  */
-static void conn_event_do(const struct ctx *c, struct tcp_conn *conn,
+static void conn_event_do(const struct ctx *c, struct tcp_tap_conn *conn,
 			  unsigned long event)
 {
 	int prev, new, num = fls(event);
@@ -963,7 +855,7 @@ static void conn_event_do(const struct ctx *c, struct tcp_conn *conn,
  *
  * Return: 1 if destination is in low RTT table, 0 otherwise
  */
-static int tcp_rtt_dst_low(const struct tcp_conn *conn)
+static int tcp_rtt_dst_low(const struct tcp_tap_conn *conn)
 {
 	int i;
 
@@ -979,7 +871,7 @@ static int tcp_rtt_dst_low(const struct tcp_conn *conn)
  * @conn:	Connection pointer
  * @tinfo:	Pointer to struct tcp_info for socket
  */
-static void tcp_rtt_dst_check(const struct tcp_conn *conn,
+static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn,
 			      const struct tcp_info *tinfo)
 {
 #ifdef HAS_MIN_RTT
@@ -1016,7 +908,7 @@ static void tcp_rtt_dst_check(const struct tcp_conn *conn,
  * tcp_get_sndbuf() - Get, scale SO_SNDBUF between thresholds (1 to 0.5 usage)
  * @conn:	Connection pointer
  */
-static void tcp_get_sndbuf(struct tcp_conn *conn)
+static void tcp_get_sndbuf(struct tcp_tap_conn *conn)
 {
 	int s = conn->sock, sndbuf;
 	socklen_t sl;
@@ -1290,7 +1182,8 @@ static int tcp_opt_get(const char *opts, size_t len, uint8_t type_find,
  *
  * Return: 1 on match, 0 otherwise
  */
-static int tcp_hash_match(const struct tcp_conn *conn, int af, const void *addr,
+static int tcp_hash_match(const struct tcp_tap_conn *conn,
+			  int af, const void *addr,
 			  in_port_t tap_port, in_port_t sock_port)
 {
 	if (af == AF_INET && CONN_V4(conn)			&&
@@ -1356,7 +1249,7 @@ static unsigned int tcp_hash(const struct ctx *c, int af, const void *addr,
  * @af:		Address family, AF_INET or AF_INET6
  * @addr:	Remote address, pointer to in_addr or in6_addr
  */
-static void tcp_hash_insert(const struct ctx *c, struct tcp_conn *conn,
+static void tcp_hash_insert(const struct ctx *c, struct tcp_tap_conn *conn,
 			    int af, const void *addr)
 {
 	int b;
@@ -1374,9 +1267,9 @@ static void tcp_hash_insert(const struct ctx *c, struct tcp_conn *conn,
  * tcp_hash_remove() - Drop connection from hash table, chain unlink
  * @conn:	Connection pointer
  */
-static void tcp_hash_remove(const struct tcp_conn *conn)
+static void tcp_hash_remove(const struct tcp_tap_conn *conn)
 {
-	struct tcp_conn *entry, *prev = NULL;
+	struct tcp_tap_conn *entry, *prev = NULL;
 	int b = conn->hash_bucket;
 
 	for (entry = tc_hash[b]; entry;
@@ -1400,9 +1293,9 @@ static void tcp_hash_remove(const struct tcp_conn *conn)
  * @old:	Old connection pointer
  * @new:	New connection pointer
  */
-static void tcp_hash_update(struct tcp_conn *old, struct tcp_conn *new)
+static void tcp_hash_update(struct tcp_tap_conn *old, struct tcp_tap_conn *new)
 {
-	struct tcp_conn *entry, *prev = NULL;
+	struct tcp_tap_conn *entry, *prev = NULL;
 	int b = old->hash_bucket;
 
 	for (entry = tc_hash[b]; entry;
@@ -1431,12 +1324,13 @@ static void tcp_hash_update(struct tcp_conn *old, struct tcp_conn *new)
  *
  * Return: connection pointer, if found, -ENOENT otherwise
  */
-static struct tcp_conn *tcp_hash_lookup(const struct ctx *c, int af,
-					const void *addr,
-					in_port_t tap_port, in_port_t sock_port)
+static struct tcp_tap_conn *tcp_hash_lookup(const struct ctx *c,
+					    int af, const void *addr,
+					    in_port_t tap_port,
+					    in_port_t sock_port)
 {
 	int b = tcp_hash(c, af, addr, tap_port, sock_port);
-	struct tcp_conn *conn;
+	struct tcp_tap_conn *conn;
 
 	for (conn = tc_hash[b]; conn; conn = conn_at_idx(conn->next_index)) {
 		if (tcp_hash_match(conn, af, addr, tap_port, sock_port))
@@ -1451,9 +1345,9 @@ static struct tcp_conn *tcp_hash_lookup(const struct ctx *c, int af,
  * @c:		Execution context
  * @hole:	Pointer to recently closed connection
  */
-static void tcp_table_compact(struct ctx *c, struct tcp_conn *hole)
+static void tcp_table_compact(struct ctx *c, struct tcp_tap_conn *hole)
 {
-	struct tcp_conn *from, *to;
+	struct tcp_tap_conn *from, *to;
 
 	if (CONN_IDX(hole) == --c->tcp.conn_count) {
 		debug("TCP: hash table compaction: maximum index was %li (%p)",
@@ -1482,7 +1376,7 @@ static void tcp_table_compact(struct ctx *c, struct tcp_conn *hole)
  * @c:		Execution context
  * @conn:	Connection pointer
  */
-static void tcp_conn_destroy(struct ctx *c, struct tcp_conn *conn)
+static void tcp_conn_destroy(struct ctx *c, struct tcp_tap_conn *conn)
 {
 	close(conn->sock);
 	if (conn->timer != -1)
@@ -1492,7 +1386,7 @@ static void tcp_conn_destroy(struct ctx *c, struct tcp_conn *conn)
 	tcp_table_compact(c, conn);
 }
 
-static void tcp_rst_do(struct ctx *c, struct tcp_conn *conn);
+static void tcp_rst_do(struct ctx *c, struct tcp_tap_conn *conn);
 #define tcp_rst(c, conn)						\
 	do {								\
 		debug("TCP: index %li, reset at %s:%i", CONN_IDX(conn), \
@@ -1627,7 +1521,7 @@ void tcp_defer_handler(struct ctx *c)
 {
 	int max_conns = c->tcp.conn_count / 100 * TCP_CONN_PRESSURE;
 	int max_files = c->nofile / 100 * TCP_FILE_PRESSURE;
-	struct tcp_conn *conn;
+	struct tcp_tap_conn *conn;
 
 	tcp_l2_flags_buf_flush(c);
 	tcp_l2_data_buf_flush(c);
@@ -1656,7 +1550,7 @@ void tcp_defer_handler(struct ctx *c)
  * Return: 802.3 length, host order
  */
 static size_t tcp_l2_buf_fill_headers(const struct ctx *c,
-				      const struct tcp_conn *conn,
+				      const struct tcp_tap_conn *conn,
 				      void *p, size_t plen,
 				      const uint16_t *check, uint32_t seq)
 {
@@ -1738,7 +1632,7 @@ do {									\
  *
  * Return: 1 if sequence or window were updated, 0 otherwise
  */
-static int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_conn *conn,
+static int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
 				 int force_seq, struct tcp_info *tinfo)
 {
 	uint32_t prev_wnd_to_tap = conn->wnd_to_tap << conn->ws_to_tap;
@@ -1824,7 +1718,7 @@ out:
  *
  * Return: negative error code on connection reset, 0 otherwise
  */
-static int tcp_send_flag(struct ctx *c, struct tcp_conn *conn, int flags)
+static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
 {
 	uint32_t prev_ack_to_tap = conn->seq_ack_to_tap;
 	uint32_t prev_wnd_to_tap = conn->wnd_to_tap;
@@ -1971,7 +1865,7 @@ static int tcp_send_flag(struct ctx *c, struct tcp_conn *conn, int flags)
  * @c:		Execution context
  * @conn:	Connection pointer
  */
-static void tcp_rst_do(struct ctx *c, struct tcp_conn *conn)
+static void tcp_rst_do(struct ctx *c, struct tcp_tap_conn *conn)
 {
 	if (conn->events == CLOSED)
 		return;
@@ -1986,7 +1880,7 @@ static void tcp_rst_do(struct ctx *c, struct tcp_conn *conn)
  * @opts:	Pointer to start of TCP options
  * @optlen:	Bytes in options: caller MUST ensure available length
  */
-static void tcp_get_tap_ws(struct tcp_conn *conn,
+static void tcp_get_tap_ws(struct tcp_tap_conn *conn,
 			   const char *opts, size_t optlen)
 {
 	int ws = tcp_opt_get(opts, optlen, OPT_WS, NULL, NULL);
@@ -2003,7 +1897,7 @@ static void tcp_get_tap_ws(struct tcp_conn *conn,
  * @conn:	Connection pointer
  * @window:	Window value, host order, unscaled
  */
-static void tcp_clamp_window(const struct ctx *c, struct tcp_conn *conn,
+static void tcp_clamp_window(const struct ctx *c, struct tcp_tap_conn *conn,
 			     unsigned wnd)
 {
 	uint32_t prev_scaled = conn->wnd_from_tap << conn->ws_from_tap;
@@ -2125,7 +2019,7 @@ static int tcp_conn_new_sock(const struct ctx *c, sa_family_t af)
  * Return: clamped MSS value
  */
 static uint16_t tcp_conn_tap_mss(const struct ctx *c,
-				 const struct tcp_conn *conn,
+				 const struct tcp_tap_conn *conn,
 				 const char *opts, size_t optlen)
 {
 	unsigned int mss;
@@ -2172,7 +2066,7 @@ static void tcp_conn_from_tap(struct ctx *c, int af, const void *addr,
 		.sin6_addr = *(struct in6_addr *)addr,
 	};
 	const struct sockaddr *sa;
-	struct tcp_conn *conn;
+	struct tcp_tap_conn *conn;
 	socklen_t sl;
 	int s, mss;
 
@@ -2280,7 +2174,7 @@ static void tcp_conn_from_tap(struct ctx *c, int af, const void *addr,
  *
  * Return: 0 on success, negative error code from recv() on failure
  */
-static int tcp_sock_consume(struct tcp_conn *conn, uint32_t ack_seq)
+static int tcp_sock_consume(struct tcp_tap_conn *conn, uint32_t ack_seq)
 {
 	/* Simply ignore out-of-order ACKs: we already consumed the data we
 	 * needed from the buffer, and we won't rewind back to a lower ACK
@@ -2307,7 +2201,7 @@ static int tcp_sock_consume(struct tcp_conn *conn, uint32_t ack_seq)
  * @seq:	Sequence number to be sent
  * @now:	Current timestamp
  */
-static void tcp_data_to_tap(struct ctx *c, struct tcp_conn *conn,
+static void tcp_data_to_tap(struct ctx *c, struct tcp_tap_conn *conn,
 			    ssize_t plen, int no_csum, uint32_t seq)
 {
 	struct iovec *iov;
@@ -2344,7 +2238,7 @@ static void tcp_data_to_tap(struct ctx *c, struct tcp_conn *conn,
  *
  * #syscalls recvmsg
  */
-static int tcp_data_from_sock(struct ctx *c, struct tcp_conn *conn)
+static int tcp_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
 {
 	uint32_t wnd_scaled = conn->wnd_from_tap << conn->ws_from_tap;
 	int fill_bufs, send_bufs = 0, last_len, iov_rem = 0;
@@ -2475,7 +2369,7 @@ zero_len:
  *
  * #syscalls sendmsg
  */
-static void tcp_data_from_tap(struct ctx *c, struct tcp_conn *conn,
+static void tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
 			      const struct pool *p)
 {
 	int i, iov_i, ack = 0, fin = 0, retr = 0, keep = -1, partial_send = 0;
@@ -2675,7 +2569,7 @@ out:
  * @opts:	Pointer to start of options
  * @optlen:	Bytes in options: caller MUST ensure available length
  */
-static void tcp_conn_from_sock_finish(struct ctx *c, struct tcp_conn *conn,
+static void tcp_conn_from_sock_finish(struct ctx *c, struct tcp_tap_conn *conn,
 				      const struct tcphdr *th,
 				      const char *opts, size_t optlen)
 {
@@ -2714,7 +2608,7 @@ static void tcp_conn_from_sock_finish(struct ctx *c, struct tcp_conn *conn,
 int tcp_tap_handler(struct ctx *c, int af, const void *addr,
 		    const struct pool *p, const struct timespec *now)
 {
-	struct tcp_conn *conn;
+	struct tcp_tap_conn *conn;
 	size_t optlen, len;
 	struct tcphdr *th;
 	int ack_due = 0;
@@ -2829,7 +2723,7 @@ int tcp_tap_handler(struct ctx *c, int af, const void *addr,
  * @c:		Execution context
  * @conn:	Connection pointer
  */
-static void tcp_connect_finish(struct ctx *c, struct tcp_conn *conn)
+static void tcp_connect_finish(struct ctx *c, struct tcp_tap_conn *conn)
 {
 	socklen_t sl;
 	int so;
@@ -2857,7 +2751,7 @@ static void tcp_conn_from_sock(struct ctx *c, union epoll_ref ref,
 			       const struct timespec *now)
 {
 	struct sockaddr_storage sa;
-	struct tcp_conn *conn;
+	struct tcp_tap_conn *conn;
 	socklen_t sl;
 	int s;
 
@@ -2949,7 +2843,7 @@ static void tcp_conn_from_sock(struct ctx *c, union epoll_ref ref,
  */
 static void tcp_timer_handler(struct ctx *c, union epoll_ref ref)
 {
-	struct tcp_conn *conn = conn_at_idx(ref.r.p.tcp.tcp.index);
+	struct tcp_tap_conn *conn = conn_at_idx(ref.r.p.tcp.tcp.index);
 	struct itimerspec check_armed = { { 0 }, { 0 } };
 
 	if (!conn)
@@ -3012,7 +2906,7 @@ static void tcp_timer_handler(struct ctx *c, union epoll_ref ref)
 void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
 		      const struct timespec *now)
 {
-	struct tcp_conn *conn;
+	struct tcp_tap_conn *conn;
 
 	if (ref.r.p.tcp.tcp.timer) {
 		tcp_timer_handler(c, ref);
@@ -3510,7 +3404,7 @@ static int tcp_port_rebind(void *arg)
 void tcp_timer(struct ctx *c, const struct timespec *ts)
 {
 	struct tcp_sock_refill_arg refill_arg = { c, 0 };
-	struct tcp_conn *conn;
+	struct tcp_tap_conn *conn;
 
 	(void)ts;
 
diff --git a/tcp_conn.h b/tcp_conn.h
new file mode 100644
index 0000000..db4c2d9
--- /dev/null
+++ b/tcp_conn.h
@@ -0,0 +1,168 @@
+/* SPDX-License-Identifier: AGPL-3.0-or-later
+ * Copyright Red Hat
+ * Author: Stefano Brivio <sbrivio@redhat.com>
+ * Author: David Gibson <david@gibson.dropbear.id.au>
+ *
+ * TCP connection tracking data structures, used by tcp.c and
+ * tcp_splice.c.  Shouldn't be included in non-TCP code.
+ */
+#ifndef TCP_CONN_H
+#define TCP_CONN_H
+
+#define TCP_HASH_BUCKET_BITS		(TCP_CONN_INDEX_BITS + 1)
+
+/**
+ * struct tcp_tap_conn - Descriptor for a TCP connection (not spliced)
+ * @next_index:		Connection index of next item in hash chain, -1 for none
+ * @tap_mss:		MSS advertised by tap/guest, rounded to 2 ^ TCP_MSS_BITS
+ * @sock:		Socket descriptor number
+ * @events:		Connection events, implying connection states
+ * @timer:		timerfd descriptor for timeout events
+ * @flags:		Connection flags representing internal attributes
+ * @hash_bucket:	Bucket index in connection lookup hash table
+ * @retrans:		Number of retransmissions occurred due to ACK_TIMEOUT
+ * @ws_from_tap:	Window scaling factor advertised from tap/guest
+ * @ws_to_tap:		Window scaling factor advertised to tap/guest
+ * @sndbuf:		Sending buffer in kernel, rounded to 2 ^ SNDBUF_BITS
+ * @seq_dup_ack_approx:	Last duplicate ACK number sent to tap
+ * @a.a6:		IPv6 remote address, can be IPv4-mapped
+ * @a.a4.zero:		Zero prefix for IPv4-mapped, see RFC 6890, Table 20
+ * @a.a4.one:		Ones prefix for IPv4-mapped
+ * @a.a4.a:		IPv4 address
+ * @tap_port:		Guest-facing tap port
+ * @sock_port:		Remote, socket-facing port
+ * @wnd_from_tap:	Last window size from tap, unscaled (as received)
+ * @wnd_to_tap:		Sending window advertised to tap, unscaled (as sent)
+ * @seq_to_tap:		Next sequence for packets to tap
+ * @seq_ack_from_tap:	Last ACK number received from tap
+ * @seq_from_tap:	Next sequence for packets from tap (not actually sent)
+ * @seq_ack_to_tap:	Last ACK number sent to tap
+ * @seq_init_from_tap:	Initial sequence number from tap
+ */
+struct tcp_tap_conn {
+	int	 	next_index	:TCP_CONN_INDEX_BITS + 2;
+
+#define TCP_RETRANS_BITS		3
+	unsigned int	retrans		:TCP_RETRANS_BITS;
+#define TCP_MAX_RETRANS			((1U << TCP_RETRANS_BITS) - 1)
+
+#define TCP_WS_BITS			4	/* RFC 7323 */
+#define TCP_WS_MAX			14
+	unsigned int	ws_from_tap	:TCP_WS_BITS;
+	unsigned int	ws_to_tap	:TCP_WS_BITS;
+
+
+	int		sock		:SOCKET_REF_BITS;
+
+	uint8_t		events;
+#define CLOSED			0
+#define SOCK_ACCEPTED		BIT(0)	/* implies SYN sent to tap */
+#define TAP_SYN_RCVD		BIT(1)	/* implies socket connecting */
+#define  TAP_SYN_ACK_SENT	BIT( 3)	/* implies socket connected */
+#define ESTABLISHED		BIT(2)
+#define  SOCK_FIN_RCVD		BIT( 3)
+#define  SOCK_FIN_SENT		BIT( 4)
+#define  TAP_FIN_RCVD		BIT( 5)
+#define  TAP_FIN_SENT		BIT( 6)
+#define  TAP_FIN_ACKED		BIT( 7)
+
+#define	CONN_STATE_BITS		/* Setting these clears other flags */	\
+	(SOCK_ACCEPTED | TAP_SYN_RCVD | ESTABLISHED)
+
+
+	int		timer		:SOCKET_REF_BITS;
+
+	uint8_t		flags;
+#define STALLED			BIT(0)
+#define LOCAL			BIT(1)
+#define WND_CLAMPED		BIT(2)
+#define IN_EPOLL		BIT(3)
+#define ACTIVE_CLOSE		BIT(4)
+#define ACK_TO_TAP_DUE		BIT(5)
+#define ACK_FROM_TAP_DUE	BIT(6)
+
+
+	unsigned int	hash_bucket	:TCP_HASH_BUCKET_BITS;
+
+#define TCP_MSS_BITS			14
+	unsigned int	tap_mss		:TCP_MSS_BITS;
+#define MSS_SET(conn, mss)	(conn->tap_mss = (mss >> (16 - TCP_MSS_BITS)))
+#define MSS_GET(conn)		(conn->tap_mss << (16 - TCP_MSS_BITS))
+
+
+#define SNDBUF_BITS		24
+	unsigned int	sndbuf		:SNDBUF_BITS;
+#define SNDBUF_SET(conn, bytes)	(conn->sndbuf = ((bytes) >> (32 - SNDBUF_BITS)))
+#define SNDBUF_GET(conn)	(conn->sndbuf << (32 - SNDBUF_BITS))
+
+	uint8_t		seq_dup_ack_approx;
+
+
+	union {
+		struct in6_addr a6;
+		struct {
+			uint8_t zero[10];
+			uint8_t one[2];
+			struct in_addr a;
+		} a4;
+	} a;
+
+	in_port_t	tap_port;
+	in_port_t	sock_port;
+
+	uint16_t	wnd_from_tap;
+	uint16_t	wnd_to_tap;
+
+	uint32_t	seq_to_tap;
+	uint32_t	seq_ack_from_tap;
+	uint32_t	seq_from_tap;
+	uint32_t	seq_ack_to_tap;
+	uint32_t	seq_init_from_tap;
+};
+
+/**
+ * struct tcp_splice_conn - Descriptor for a spliced TCP connection
+ * @a:			File descriptor number of socket for accepted connection
+ * @pipe_a_b:		Pipe ends for splice() from @a to @b
+ * @b:			File descriptor number of peer connected socket
+ * @pipe_b_a:		Pipe ends for splice() from @b to @a
+ * @events:		Events observed/actions performed on connection
+ * @flags:		Connection flags (attributes, not events)
+ * @a_read:		Bytes read from @a (not fully written to @b in one shot)
+ * @a_written:		Bytes written to @a (not fully written from one @b read)
+ * @b_read:		Bytes read from @b (not fully written to @a in one shot)
+ * @b_written:		Bytes written to @b (not fully written from one @a read)
+*/
+struct tcp_splice_conn {
+	int a;
+	int pipe_a_b[2];
+	int b;
+	int pipe_b_a[2];
+
+	uint8_t events;
+#define SPLICE_CLOSED			0
+#define SPLICE_CONNECT			BIT(0)
+#define SPLICE_ESTABLISHED		BIT(1)
+#define A_OUT_WAIT			BIT(2)
+#define B_OUT_WAIT			BIT(3)
+#define A_FIN_RCVD			BIT(4)
+#define B_FIN_RCVD			BIT(5)
+#define A_FIN_SENT			BIT(6)
+#define B_FIN_SENT			BIT(7)
+
+	uint8_t flags;
+#define SPLICE_V6			BIT(0)
+#define SPLICE_IN_EPOLL			BIT(1)
+#define RCVLOWAT_SET_A			BIT(2)
+#define RCVLOWAT_SET_B			BIT(3)
+#define RCVLOWAT_ACT_A			BIT(4)
+#define RCVLOWAT_ACT_B			BIT(5)
+#define CLOSING				BIT(6)
+
+	uint32_t a_read;
+	uint32_t a_written;
+	uint32_t b_read;
+	uint32_t b_written;
+};
+
+#endif /* TCP_CONN_H */
diff --git a/tcp_splice.c b/tcp_splice.c
index 4cc4ad2..cbfab01 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -21,12 +21,12 @@
  *
  * - SPLICE_CONNECT:		connection accepted, connecting to target
  * - SPLICE_ESTABLISHED:	connection to target established
- * - SPLICE_A_OUT_WAIT:		pipe to accepted socket full, wait for EPOLLOUT
- * - SPLICE_B_OUT_WAIT:		pipe to target socket full, wait for EPOLLOUT
- * - SPLICE_A_FIN_RCVD:		FIN (EPOLLRDHUP) seen from accepted socket
- * - SPLICE_B_FIN_RCVD:		FIN (EPOLLRDHUP) seen from target socket
- * - SPLICE_A_FIN_RCVD:		FIN (write shutdown) sent to accepted socket
- * - SPLICE_B_FIN_RCVD:		FIN (write shutdown) sent to target socket
+ * - A_OUT_WAIT:		pipe to accepted socket full, wait for EPOLLOUT
+ * - B_OUT_WAIT:		pipe to target socket full, wait for EPOLLOUT
+ * - A_FIN_RCVD:		FIN (EPOLLRDHUP) seen from accepted socket
+ * - B_FIN_RCVD:		FIN (EPOLLRDHUP) seen from target socket
+ * - A_FIN_RCVD:		FIN (write shutdown) sent to accepted socket
+ * - B_FIN_RCVD:		FIN (write shutdown) sent to target socket
  *
  * #syscalls:pasta pipe2|pipe fcntl armv6l:fcntl64 armv7l:fcntl64 ppc64:fcntl64
  */
@@ -52,6 +52,8 @@
 #include "log.h"
 #include "tcp_splice.h"
 
+#include "tcp_conn.h"
+
 #define MAX_PIPE_SIZE			(8UL * 1024 * 1024)
 #define TCP_SPLICE_PIPE_POOL_SIZE	16
 #define TCP_SPLICE_CONN_PRESSURE	30	/* % of splice_conn_count */
@@ -66,52 +68,7 @@ extern int ns_sock_pool6		[TCP_SOCK_POOL_SIZE];
 /* Pool of pre-opened pipes */
 static int splice_pipe_pool		[TCP_SPLICE_PIPE_POOL_SIZE][2][2];
 
-/**
- * struct tcp_splice_conn - Descriptor for a spliced TCP connection
- * @a:			File descriptor number of socket for accepted connection
- * @pipe_a_b:		Pipe ends for splice() from @a to @b
- * @b:			File descriptor number of peer connected socket
- * @pipe_b_a:		Pipe ends for splice() from @b to @a
- * @events:		Events observed/actions performed on connection
- * @flags:		Connection flags (attributes, not events)
- * @a_read:		Bytes read from @a (not fully written to @b in one shot)
- * @a_written:		Bytes written to @a (not fully written from one @b read)
- * @b_read:		Bytes read from @b (not fully written to @a in one shot)
- * @b_written:		Bytes written to @b (not fully written from one @a read)
-*/
-struct tcp_splice_conn {
-	int a;
-	int pipe_a_b[2];
-	int b;
-	int pipe_b_a[2];
-
-	uint8_t events;
-#define CLOSED				0
-#define CONNECT				BIT(0)
-#define ESTABLISHED			BIT(1)
-#define A_OUT_WAIT			BIT(2)
-#define B_OUT_WAIT			BIT(3)
-#define A_FIN_RCVD			BIT(4)
-#define B_FIN_RCVD			BIT(5)
-#define A_FIN_SENT			BIT(6)
-#define B_FIN_SENT			BIT(7)
-
-	uint8_t flags;
-#define SOCK_V6				BIT(0)
-#define IN_EPOLL			BIT(1)
-#define RCVLOWAT_SET_A			BIT(2)
-#define RCVLOWAT_SET_B			BIT(3)
-#define RCVLOWAT_ACT_A			BIT(4)
-#define RCVLOWAT_ACT_B			BIT(5)
-#define CLOSING				BIT(6)
-
-	uint32_t a_read;
-	uint32_t a_written;
-	uint32_t b_read;
-	uint32_t b_written;
-};
-
-#define CONN_V6(x)			(x->flags & SOCK_V6)
+#define CONN_V6(x)			(x->flags & SPLICE_V6)
 #define CONN_V4(x)			(!CONN_V6(x))
 #define CONN_HAS(conn, set)		((conn->events & (set)) == (set))
 #define CONN(index)			(tc_splice + (index))
@@ -122,13 +79,13 @@ static struct tcp_splice_conn tc_splice[TCP_SPLICE_MAX_CONNS];
 
 /* Display strings for connection events */
 static const char *tcp_splice_event_str[] __attribute((__unused__)) = {
-	"CONNECT", "ESTABLISHED", "A_OUT_WAIT", "B_OUT_WAIT",
+	"SPLICE_CONNECT", "SPLICE_ESTABLISHED", "A_OUT_WAIT", "B_OUT_WAIT",
 	"A_FIN_RCVD", "B_FIN_RCVD", "A_FIN_SENT", "B_FIN_SENT",
 };
 
 /* Display strings for connection flags */
 static const char *tcp_splice_flag_str[] __attribute((__unused__)) = {
-	"SOCK_V6", "IN_EPOLL", "RCVLOWAT_SET_A", "RCVLOWAT_SET_B",
+	"SPLICE_V6", "SPLICE_IN_EPOLL", "RCVLOWAT_SET_A", "RCVLOWAT_SET_B",
 	"RCVLOWAT_ACT_A", "RCVLOWAT_ACT_B", "CLOSING",
 };
 
@@ -143,12 +100,12 @@ static void tcp_splice_conn_epoll_events(uint16_t events,
 {
 	*a = *b = 0;
 
-	if (events & ESTABLISHED) {
+	if (events & SPLICE_ESTABLISHED) {
 		if (!(events & B_FIN_SENT))
 			*a = EPOLLIN | EPOLLRDHUP;
 		if (!(events & A_FIN_SENT))
 			*b = EPOLLIN | EPOLLRDHUP;
-	} else if (events & CONNECT) {
+	} else if (events & SPLICE_CONNECT) {
 		*b = EPOLLOUT;
 	}
 
@@ -210,7 +167,7 @@ static void conn_flag_do(const struct ctx *c, struct tcp_splice_conn *conn,
 static int tcp_splice_epoll_ctl(const struct ctx *c,
 				struct tcp_splice_conn *conn)
 {
-	int m = (conn->flags & IN_EPOLL) ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
+	int m = (conn->flags & SPLICE_IN_EPOLL) ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
 	union epoll_ref ref_a = { .r.proto = IPPROTO_TCP, .r.s = conn->a,
 				  .r.p.tcp.tcp.splice = 1,
 				  .r.p.tcp.tcp.index = CONN_IDX(conn),
@@ -234,7 +191,7 @@ static int tcp_splice_epoll_ctl(const struct ctx *c,
 	    epoll_ctl(c->epollfd, m, conn->b, &ev_b))
 		goto delete;
 
-	conn->flags |= IN_EPOLL;		/* No need to log this */
+	conn->flags |= SPLICE_IN_EPOLL;		/* No need to log this */
 
 	return 0;
 
@@ -323,7 +280,7 @@ static void tcp_table_splice_compact(struct ctx *c,
  */
 static void tcp_splice_destroy(struct ctx *c, struct tcp_splice_conn *conn)
 {
-	if (conn->events & ESTABLISHED) {
+	if (conn->events & SPLICE_ESTABLISHED) {
 		/* Flushing might need to block: don't recycle them. */
 		if (conn->pipe_a_b[0] != -1) {
 			close(conn->pipe_a_b[0]);
@@ -337,7 +294,7 @@ static void tcp_splice_destroy(struct ctx *c, struct tcp_splice_conn *conn)
 		}
 	}
 
-	if (conn->events & CONNECT) {
+	if (conn->events & SPLICE_CONNECT) {
 		close(conn->b);
 		conn->b = -1;
 	}
@@ -346,7 +303,7 @@ static void tcp_splice_destroy(struct ctx *c, struct tcp_splice_conn *conn)
 	conn->a = -1;
 	conn->a_read = conn->a_written = conn->b_read = conn->b_written = 0;
 
-	conn->events = CLOSED;
+	conn->events = SPLICE_CLOSED;
 	conn->flags = 0;
 	debug("TCP (spliced): index %li, CLOSED", CONN_IDX(conn));
 
@@ -397,8 +354,8 @@ static int tcp_splice_connect_finish(const struct ctx *c,
 		}
 	}
 
-	if (!(conn->events & ESTABLISHED))
-		conn_event(c, conn, ESTABLISHED);
+	if (!(conn->events & SPLICE_ESTABLISHED))
+		conn_event(c, conn, SPLICE_ESTABLISHED);
 
 	return 0;
 }
@@ -466,9 +423,9 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
 			close(sock_conn);
 			return ret;
 		}
-		conn_event(c, conn, CONNECT);
+		conn_event(c, conn, SPLICE_CONNECT);
 	} else {
-		conn_event(c, conn, ESTABLISHED);
+		conn_event(c, conn, SPLICE_ESTABLISHED);
 		return tcp_splice_connect_finish(c, conn);
 	}
 
@@ -598,7 +555,7 @@ void tcp_sock_handler_splice(struct ctx *c, union epoll_ref ref,
 
 		conn = CONN(c->tcp.splice_conn_count++);
 		conn->a = s;
-		conn->flags = ref.r.p.tcp.tcp.v6 ? SOCK_V6 : 0;
+		conn->flags = ref.r.p.tcp.tcp.v6 ? SPLICE_V6 : 0;
 
 		if (tcp_splice_new(c, conn, ref.r.p.tcp.tcp.index,
 				   ref.r.p.tcp.tcp.outbound))
@@ -609,13 +566,13 @@ void tcp_sock_handler_splice(struct ctx *c, union epoll_ref ref,
 
 	conn = CONN(ref.r.p.tcp.tcp.index);
 
-	if (conn->events == CLOSED)
+	if (conn->events == SPLICE_CLOSED)
 		return;
 
 	if (events & EPOLLERR)
 		goto close;
 
-	if (conn->events == CONNECT) {
+	if (conn->events == SPLICE_CONNECT) {
 		if (!(events & EPOLLOUT))
 			goto close;
 		if (tcp_splice_connect_finish(c, conn))
-- 
@@ -21,12 +21,12 @@
  *
  * - SPLICE_CONNECT:		connection accepted, connecting to target
  * - SPLICE_ESTABLISHED:	connection to target established
- * - SPLICE_A_OUT_WAIT:		pipe to accepted socket full, wait for EPOLLOUT
- * - SPLICE_B_OUT_WAIT:		pipe to target socket full, wait for EPOLLOUT
- * - SPLICE_A_FIN_RCVD:		FIN (EPOLLRDHUP) seen from accepted socket
- * - SPLICE_B_FIN_RCVD:		FIN (EPOLLRDHUP) seen from target socket
- * - SPLICE_A_FIN_RCVD:		FIN (write shutdown) sent to accepted socket
- * - SPLICE_B_FIN_RCVD:		FIN (write shutdown) sent to target socket
+ * - A_OUT_WAIT:		pipe to accepted socket full, wait for EPOLLOUT
+ * - B_OUT_WAIT:		pipe to target socket full, wait for EPOLLOUT
+ * - A_FIN_RCVD:		FIN (EPOLLRDHUP) seen from accepted socket
+ * - B_FIN_RCVD:		FIN (EPOLLRDHUP) seen from target socket
+ * - A_FIN_RCVD:		FIN (write shutdown) sent to accepted socket
+ * - B_FIN_RCVD:		FIN (write shutdown) sent to target socket
  *
  * #syscalls:pasta pipe2|pipe fcntl armv6l:fcntl64 armv7l:fcntl64 ppc64:fcntl64
  */
@@ -52,6 +52,8 @@
 #include "log.h"
 #include "tcp_splice.h"
 
+#include "tcp_conn.h"
+
 #define MAX_PIPE_SIZE			(8UL * 1024 * 1024)
 #define TCP_SPLICE_PIPE_POOL_SIZE	16
 #define TCP_SPLICE_CONN_PRESSURE	30	/* % of splice_conn_count */
@@ -66,52 +68,7 @@ extern int ns_sock_pool6		[TCP_SOCK_POOL_SIZE];
 /* Pool of pre-opened pipes */
 static int splice_pipe_pool		[TCP_SPLICE_PIPE_POOL_SIZE][2][2];
 
-/**
- * struct tcp_splice_conn - Descriptor for a spliced TCP connection
- * @a:			File descriptor number of socket for accepted connection
- * @pipe_a_b:		Pipe ends for splice() from @a to @b
- * @b:			File descriptor number of peer connected socket
- * @pipe_b_a:		Pipe ends for splice() from @b to @a
- * @events:		Events observed/actions performed on connection
- * @flags:		Connection flags (attributes, not events)
- * @a_read:		Bytes read from @a (not fully written to @b in one shot)
- * @a_written:		Bytes written to @a (not fully written from one @b read)
- * @b_read:		Bytes read from @b (not fully written to @a in one shot)
- * @b_written:		Bytes written to @b (not fully written from one @a read)
-*/
-struct tcp_splice_conn {
-	int a;
-	int pipe_a_b[2];
-	int b;
-	int pipe_b_a[2];
-
-	uint8_t events;
-#define CLOSED				0
-#define CONNECT				BIT(0)
-#define ESTABLISHED			BIT(1)
-#define A_OUT_WAIT			BIT(2)
-#define B_OUT_WAIT			BIT(3)
-#define A_FIN_RCVD			BIT(4)
-#define B_FIN_RCVD			BIT(5)
-#define A_FIN_SENT			BIT(6)
-#define B_FIN_SENT			BIT(7)
-
-	uint8_t flags;
-#define SOCK_V6				BIT(0)
-#define IN_EPOLL			BIT(1)
-#define RCVLOWAT_SET_A			BIT(2)
-#define RCVLOWAT_SET_B			BIT(3)
-#define RCVLOWAT_ACT_A			BIT(4)
-#define RCVLOWAT_ACT_B			BIT(5)
-#define CLOSING				BIT(6)
-
-	uint32_t a_read;
-	uint32_t a_written;
-	uint32_t b_read;
-	uint32_t b_written;
-};
-
-#define CONN_V6(x)			(x->flags & SOCK_V6)
+#define CONN_V6(x)			(x->flags & SPLICE_V6)
 #define CONN_V4(x)			(!CONN_V6(x))
 #define CONN_HAS(conn, set)		((conn->events & (set)) == (set))
 #define CONN(index)			(tc_splice + (index))
@@ -122,13 +79,13 @@ static struct tcp_splice_conn tc_splice[TCP_SPLICE_MAX_CONNS];
 
 /* Display strings for connection events */
 static const char *tcp_splice_event_str[] __attribute((__unused__)) = {
-	"CONNECT", "ESTABLISHED", "A_OUT_WAIT", "B_OUT_WAIT",
+	"SPLICE_CONNECT", "SPLICE_ESTABLISHED", "A_OUT_WAIT", "B_OUT_WAIT",
 	"A_FIN_RCVD", "B_FIN_RCVD", "A_FIN_SENT", "B_FIN_SENT",
 };
 
 /* Display strings for connection flags */
 static const char *tcp_splice_flag_str[] __attribute((__unused__)) = {
-	"SOCK_V6", "IN_EPOLL", "RCVLOWAT_SET_A", "RCVLOWAT_SET_B",
+	"SPLICE_V6", "SPLICE_IN_EPOLL", "RCVLOWAT_SET_A", "RCVLOWAT_SET_B",
 	"RCVLOWAT_ACT_A", "RCVLOWAT_ACT_B", "CLOSING",
 };
 
@@ -143,12 +100,12 @@ static void tcp_splice_conn_epoll_events(uint16_t events,
 {
 	*a = *b = 0;
 
-	if (events & ESTABLISHED) {
+	if (events & SPLICE_ESTABLISHED) {
 		if (!(events & B_FIN_SENT))
 			*a = EPOLLIN | EPOLLRDHUP;
 		if (!(events & A_FIN_SENT))
 			*b = EPOLLIN | EPOLLRDHUP;
-	} else if (events & CONNECT) {
+	} else if (events & SPLICE_CONNECT) {
 		*b = EPOLLOUT;
 	}
 
@@ -210,7 +167,7 @@ static void conn_flag_do(const struct ctx *c, struct tcp_splice_conn *conn,
 static int tcp_splice_epoll_ctl(const struct ctx *c,
 				struct tcp_splice_conn *conn)
 {
-	int m = (conn->flags & IN_EPOLL) ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
+	int m = (conn->flags & SPLICE_IN_EPOLL) ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
 	union epoll_ref ref_a = { .r.proto = IPPROTO_TCP, .r.s = conn->a,
 				  .r.p.tcp.tcp.splice = 1,
 				  .r.p.tcp.tcp.index = CONN_IDX(conn),
@@ -234,7 +191,7 @@ static int tcp_splice_epoll_ctl(const struct ctx *c,
 	    epoll_ctl(c->epollfd, m, conn->b, &ev_b))
 		goto delete;
 
-	conn->flags |= IN_EPOLL;		/* No need to log this */
+	conn->flags |= SPLICE_IN_EPOLL;		/* No need to log this */
 
 	return 0;
 
@@ -323,7 +280,7 @@ static void tcp_table_splice_compact(struct ctx *c,
  */
 static void tcp_splice_destroy(struct ctx *c, struct tcp_splice_conn *conn)
 {
-	if (conn->events & ESTABLISHED) {
+	if (conn->events & SPLICE_ESTABLISHED) {
 		/* Flushing might need to block: don't recycle them. */
 		if (conn->pipe_a_b[0] != -1) {
 			close(conn->pipe_a_b[0]);
@@ -337,7 +294,7 @@ static void tcp_splice_destroy(struct ctx *c, struct tcp_splice_conn *conn)
 		}
 	}
 
-	if (conn->events & CONNECT) {
+	if (conn->events & SPLICE_CONNECT) {
 		close(conn->b);
 		conn->b = -1;
 	}
@@ -346,7 +303,7 @@ static void tcp_splice_destroy(struct ctx *c, struct tcp_splice_conn *conn)
 	conn->a = -1;
 	conn->a_read = conn->a_written = conn->b_read = conn->b_written = 0;
 
-	conn->events = CLOSED;
+	conn->events = SPLICE_CLOSED;
 	conn->flags = 0;
 	debug("TCP (spliced): index %li, CLOSED", CONN_IDX(conn));
 
@@ -397,8 +354,8 @@ static int tcp_splice_connect_finish(const struct ctx *c,
 		}
 	}
 
-	if (!(conn->events & ESTABLISHED))
-		conn_event(c, conn, ESTABLISHED);
+	if (!(conn->events & SPLICE_ESTABLISHED))
+		conn_event(c, conn, SPLICE_ESTABLISHED);
 
 	return 0;
 }
@@ -466,9 +423,9 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
 			close(sock_conn);
 			return ret;
 		}
-		conn_event(c, conn, CONNECT);
+		conn_event(c, conn, SPLICE_CONNECT);
 	} else {
-		conn_event(c, conn, ESTABLISHED);
+		conn_event(c, conn, SPLICE_ESTABLISHED);
 		return tcp_splice_connect_finish(c, conn);
 	}
 
@@ -598,7 +555,7 @@ void tcp_sock_handler_splice(struct ctx *c, union epoll_ref ref,
 
 		conn = CONN(c->tcp.splice_conn_count++);
 		conn->a = s;
-		conn->flags = ref.r.p.tcp.tcp.v6 ? SOCK_V6 : 0;
+		conn->flags = ref.r.p.tcp.tcp.v6 ? SPLICE_V6 : 0;
 
 		if (tcp_splice_new(c, conn, ref.r.p.tcp.tcp.index,
 				   ref.r.p.tcp.tcp.outbound))
@@ -609,13 +566,13 @@ void tcp_sock_handler_splice(struct ctx *c, union epoll_ref ref,
 
 	conn = CONN(ref.r.p.tcp.tcp.index);
 
-	if (conn->events == CLOSED)
+	if (conn->events == SPLICE_CLOSED)
 		return;
 
 	if (events & EPOLLERR)
 		goto close;
 
-	if (conn->events == CONNECT) {
+	if (conn->events == SPLICE_CONNECT) {
 		if (!(events & EPOLLOUT))
 			goto close;
 		if (tcp_splice_connect_finish(c, conn))
-- 
2.38.1


  parent reply	other threads:[~2022-11-16  4:42 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-16  4:41 [PATCH 00/32] Use dual stack sockets to listen for inbound TCP connections David Gibson
2022-11-16  4:41 ` [PATCH 01/32] clang-tidy: Suppress warning about assignments in if statements David Gibson
2022-11-16 23:10   ` Stefano Brivio
2022-11-17  1:20     ` David Gibson
2022-11-16  4:41 ` [PATCH 02/32] style: Minor corrections to function comments David Gibson
2022-11-16 23:11   ` Stefano Brivio
2022-11-17  1:21     ` David Gibson
2022-11-16  4:41 ` [PATCH 03/32] tcp_splice: #include tcp_splice.h in tcp_splice.c David Gibson
2022-11-16  4:41 ` [PATCH 04/32] tcp: Remove unused TCP_MAX_SOCKS constant David Gibson
2022-11-16  4:41 ` [PATCH 05/32] tcp: Better helpers for converting between connection pointer and index David Gibson
2022-11-16 23:11   ` Stefano Brivio
2022-11-17  1:24     ` David Gibson
2022-11-16  4:41 ` [PATCH 06/32] tcp_splice: Helpers for converting from index to/from tcp_splice_conn David Gibson
2022-11-16  4:41 ` David Gibson [this message]
2022-11-16  4:41 ` [PATCH 08/32] tcp: Add connection union type David Gibson
2022-11-16  4:41 ` [PATCH 09/32] tcp: Improved helpers to update connections after moving David Gibson
2022-11-16  4:41 ` [PATCH 10/32] tcp: Unify spliced and non-spliced connection tables David Gibson
2022-11-16  4:41 ` [PATCH 11/32] tcp: Unify tcp_defer_handler and tcp_splice_defer_handler() David Gibson
2022-11-16  4:41 ` [PATCH 12/32] tcp: Partially unify tcp_timer() and tcp_splice_timer() David Gibson
2022-11-16  4:41 ` [PATCH 13/32] tcp: Unify the IN_EPOLL flag David Gibson
2022-11-16  4:41 ` [PATCH 14/32] tcp: Separate helpers to create ns listening sockets David Gibson
2022-11-16 23:51   ` Stefano Brivio
2022-11-17  1:32     ` David Gibson
2022-11-16  4:41 ` [PATCH 15/32] tcp: Unify part of spliced and non-spliced conn_from_sock path David Gibson
2022-11-16 23:53   ` Stefano Brivio
2022-11-17  1:37     ` David Gibson
2022-11-17  7:30       ` Stefano Brivio
2022-11-17  8:58         ` David Gibson
2022-11-16  4:41 ` [PATCH 16/32] tcp: Use the same sockets to listen for spliced and non-spliced connections David Gibson
2022-11-16 23:54   ` Stefano Brivio
2022-11-17  1:43     ` David Gibson
2022-11-16  4:41 ` [PATCH 17/32] tcp: Remove splice from tcp_epoll_ref David Gibson
2022-11-16  4:41 ` [PATCH 18/32] tcp: Don't store hash bucket in connection structures David Gibson
2022-11-16  4:41 ` [PATCH 19/32] inany: Helper functions for handling addresses which could be IPv4 or IPv6 David Gibson
2022-11-16 23:54   ` Stefano Brivio
2022-11-17  1:48     ` David Gibson
2022-11-16  4:42 ` [PATCH 20/32] tcp: Hash IPv4 and IPv4-mapped-IPv6 addresses the same David Gibson
2022-11-16  4:42 ` [PATCH 21/32] tcp: Take tcp_hash_insert() address from struct tcp_conn David Gibson
2022-11-16  4:42 ` [PATCH 22/32] tcp: Simplify tcp_hash_match() to take an inany_addr David Gibson
2022-11-16  4:42 ` [PATCH 23/32] tcp: Unify initial sequence number calculation for IPv4 and IPv6 David Gibson
2022-11-16  4:42 ` [PATCH 24/32] tcp: Have tcp_seq_init() take its parameters from struct tcp_conn David Gibson
2022-11-16  4:42 ` [PATCH 25/32] tcp: Fix small errors in tcp_seq_init() time handling David Gibson
2022-11-16  4:42 ` [PATCH 26/32] tcp: Remove v6 flag from tcp_epoll_ref David Gibson
2022-11-17  0:15   ` Stefano Brivio
2022-11-17  1:50     ` David Gibson
2022-11-16  4:42 ` [PATCH 27/32] tcp: NAT IPv4-mapped IPv6 addresses like IPv4 addresses David Gibson
2022-11-17  0:15   ` Stefano Brivio
2022-11-17  2:00     ` David Gibson
2022-11-16  4:42 ` [PATCH 28/32] tcp_splice: Allow splicing of connections from IPv4-mapped loopback David Gibson
2022-11-17  0:15   ` Stefano Brivio
2022-11-17  2:05     ` David Gibson
2022-11-16  4:42 ` [PATCH 29/32] tcp: Consolidate tcp_sock_init[46] David Gibson
2022-11-16  4:42 ` [PATCH 30/32] util: Allow sock_l4() to open dual stack sockets David Gibson
2022-11-16  4:42 ` [PATCH 31/32] util: Always return -1 on error in sock_l4() David Gibson
2022-11-16  4:42 ` [PATCH 32/32] tcp: Use dual stack sockets for port forwarding when possible David Gibson
2022-11-17  0:15   ` Stefano Brivio
2022-11-17  2:08     ` David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221116044212.3876516-8-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).