public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
* [PATCH v8 0/6] Retry SYNs for inbound connections
@ 2025-11-10  9:31 Yumei Huang
  2025-11-10  9:31 ` [PATCH v8 1/6] tcp: Rename "retrans" to "retries" Yumei Huang
                   ` (5 more replies)
  0 siblings, 6 replies; 32+ messages in thread
From: Yumei Huang @ 2025-11-10  9:31 UTC (permalink / raw)
  To: passt-dev, sbrivio; +Cc: david, yuhuang

When a client connects, SYN would be sent to guest only once. If the
guest is not connected or ready at that time, the connection will be
reset in 10s. These patches introduce the SYN retry mechanism using
the similar backoff timeout as linux kernel. Also update the data
retransmission timeout using the backoff timeout.

v8:
    - Remove the TCP_/tcp_ suffix from certain macro and struct member names
    - Add parameter struct ctx *c to tcp_timer_ctl()
    - Modify rto_max type from size_t to int
    - Clamp syn_retries and syn_linear_timeouts with MAX_SYNCNT(127)
    - Some other minor fixes related to wording and formatting

v7:
    - Update read_file() and read_file_integer()
    - Rename tcp_syn_params_init() to tcp_get_rto_params()
    - Modify the implementation of the timeout assignment
    - Add a patch to clamp the retry timeout

Yumei Huang (6):
  tcp: Rename "retrans" to "retries"
  util: Introduce read_file() and read_file_integer() function
  tcp: Add parameter struct ctx *c to tcp_timer_ctl()
  tcp: Resend SYN for inbound connections
  tcp: Update data retransmission timeout
  tcp: Clamp the retry timeout

 tcp.c      | 107 +++++++++++++++++++++++++++++++++++++++--------------
 tcp.h      |   6 +++
 tcp_conn.h |  13 ++++---
 util.c     |  86 ++++++++++++++++++++++++++++++++++++++++++
 util.h     |   2 +
 5 files changed, 181 insertions(+), 33 deletions(-)

-- 
2.51.0


^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH v8 1/6] tcp: Rename "retrans" to "retries"
  2025-11-10  9:31 [PATCH v8 0/6] Retry SYNs for inbound connections Yumei Huang
@ 2025-11-10  9:31 ` Yumei Huang
  2025-11-10  9:31 ` [PATCH v8 2/6] util: Introduce read_file() and read_file_integer() function Yumei Huang
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 32+ messages in thread
From: Yumei Huang @ 2025-11-10  9:31 UTC (permalink / raw)
  To: passt-dev, sbrivio; +Cc: david, yuhuang

Rename "retrans" to "retries" so it can be used for SYN retries.

Signed-off-by: Yumei Huang <yuhuang@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
---
 tcp.c      | 12 ++++++------
 tcp_conn.h | 12 ++++++------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/tcp.c b/tcp.c
index e91c0cf..ca3d742 100644
--- a/tcp.c
+++ b/tcp.c
@@ -186,7 +186,7 @@
  * - ACK_TIMEOUT: if no ACK segment was received from tap/guest, after sending
  *   data (flag ACK_FROM_TAP_DUE with ESTABLISHED event), re-send data from the
  *   socket and reset sequence to what was acknowledged. If this persists for
- *   more than TCP_MAX_RETRANS times in a row, reset the connection
+ *   more than TCP_MAX_RETRIES times in a row, reset the connection
  *
  * - FIN_TIMEOUT: if a FIN segment was sent to tap/guest (flag ACK_FROM_TAP_DUE
  *   with TAP_FIN_SENT event), and no ACK is received within this time, reset
@@ -1140,7 +1140,7 @@ static void tcp_update_seqack_from_tap(const struct ctx *c,
 		if (SEQ_LT(seq, conn->seq_to_tap))
 			conn_flag(c, conn, ACK_FROM_TAP_DUE);
 
-		conn->retrans = 0;
+		conn->retries = 0;
 		conn->seq_ack_from_tap = seq;
 	}
 }
@@ -2429,7 +2429,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
 		} else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
 			flow_dbg(conn, "FIN timeout");
 			tcp_rst(c, conn);
-		} else if (conn->retrans == TCP_MAX_RETRANS) {
+		} else if (conn->retries == TCP_MAX_RETRIES) {
 			flow_dbg(conn, "retransmissions count exceeded");
 			tcp_rst(c, conn);
 		} else {
@@ -2438,7 +2438,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
 			if (!conn->wnd_from_tap)
 				conn->wnd_from_tap = 1; /* Zero-window probe */
 
-			conn->retrans++;
+			conn->retries++;
 			if (tcp_rewind_seq(c, conn))
 				return;
 
@@ -3400,7 +3400,7 @@ static int tcp_flow_repair_opt(const struct tcp_tap_conn *conn,
 int tcp_flow_migrate_source(int fd, struct tcp_tap_conn *conn)
 {
 	struct tcp_tap_transfer t = {
-		.retrans		= conn->retrans,
+		.retries		= conn->retries,
 		.ws_from_tap		= conn->ws_from_tap,
 		.ws_to_tap		= conn->ws_to_tap,
 		.events			= conn->events,
@@ -3680,7 +3680,7 @@ int tcp_flow_migrate_target(struct ctx *c, int fd)
 	memcpy(&flow->f.side, &t.side, sizeof(flow->f.side));
 	conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp);
 
-	conn->retrans			= t.retrans;
+	conn->retries			= t.retries;
 	conn->ws_from_tap		= t.ws_from_tap;
 	conn->ws_to_tap			= t.ws_to_tap;
 	conn->events			= t.events;
diff --git a/tcp_conn.h b/tcp_conn.h
index 8133312..923af36 100644
--- a/tcp_conn.h
+++ b/tcp_conn.h
@@ -12,7 +12,7 @@
 /**
  * struct tcp_tap_conn - Descriptor for a TCP connection (not spliced)
  * @f:			Generic flow information
- * @retrans:		Number of retransmissions occurred due to ACK_TIMEOUT
+ * @retries:		Number of retries occurred due to timeouts
  * @ws_from_tap:	Window scaling factor advertised from tap/guest
  * @ws_to_tap:		Window scaling factor advertised to tap/guest
  * @tap_mss:		MSS advertised by tap/guest, rounded to 2 ^ TCP_MSS_BITS
@@ -35,9 +35,9 @@ struct tcp_tap_conn {
 	/* Must be first element */
 	struct flow_common f;
 
-#define TCP_RETRANS_BITS		3
-	unsigned int	retrans		:TCP_RETRANS_BITS;
-#define TCP_MAX_RETRANS			MAX_FROM_BITS(TCP_RETRANS_BITS)
+#define TCP_RETRIES_BITS		3
+	unsigned int	retries		:TCP_RETRIES_BITS;
+#define TCP_MAX_RETRIES			MAX_FROM_BITS(TCP_RETRIES_BITS)
 
 #define TCP_WS_BITS			4	/* RFC 7323 */
 #define TCP_WS_MAX			14
@@ -99,7 +99,7 @@ struct tcp_tap_conn {
  * struct tcp_tap_transfer - Migrated TCP data, flow table part, network order
  * @pif:		Interfaces for each side of the flow
  * @side:		Addresses and ports for each side of the flow
- * @retrans:		Number of retransmissions occurred due to ACK_TIMEOUT
+ * @retries:		Number of retries occurred due to timeouts
  * @ws_from_tap:	Window scaling factor advertised from tap/guest
  * @ws_to_tap:		Window scaling factor advertised to tap/guest
  * @events:		Connection events, implying connection states
@@ -119,7 +119,7 @@ struct tcp_tap_transfer {
 	uint8_t		pif[SIDES];
 	struct flowside	side[SIDES];
 
-	uint8_t		retrans;
+	uint8_t		retries;
 	uint8_t		ws_from_tap;
 	uint8_t		ws_to_tap;
 	uint8_t		events;
-- 
2.51.0


^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH v8 2/6] util: Introduce read_file() and read_file_integer() function
  2025-11-10  9:31 [PATCH v8 0/6] Retry SYNs for inbound connections Yumei Huang
  2025-11-10  9:31 ` [PATCH v8 1/6] tcp: Rename "retrans" to "retries" Yumei Huang
@ 2025-11-10  9:31 ` Yumei Huang
  2025-11-14  0:01   ` Stefano Brivio
  2025-11-18  0:19   ` Stefano Brivio
  2025-11-10  9:31 ` [PATCH v8 3/6] tcp: Add parameter struct ctx *c to tcp_timer_ctl() Yumei Huang
                   ` (3 subsequent siblings)
  5 siblings, 2 replies; 32+ messages in thread
From: Yumei Huang @ 2025-11-10  9:31 UTC (permalink / raw)
  To: passt-dev, sbrivio; +Cc: david, yuhuang

Signed-off-by: Yumei Huang <yuhuang@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
---
 util.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 util.h |  2 ++
 2 files changed, 88 insertions(+)

diff --git a/util.c b/util.c
index 44c21a3..c4c849c 100644
--- a/util.c
+++ b/util.c
@@ -590,6 +590,92 @@ int write_file(const char *path, const char *buf)
 	return len == 0 ? 0 : -1;
 }
 
+/**
+ * read_file() - Read contents of file into a NULL-terminated buffer
+ * @path:	Path to file to read
+ * @buf:	Buffer to store file contents
+ * @buf_size:	Size of buffer
+ *
+ * Return: number of bytes read on success, -1 on error, -ENOBUFS on truncation
+ */
+ssize_t read_file(const char *path, char *buf, size_t buf_size)
+{
+	int fd = open(path, O_RDONLY | O_CLOEXEC);
+	size_t total_read = 0;
+	ssize_t rc;
+
+	if (fd < 0) {
+		warn_perror("Could not open %s", path);
+		return -1;
+	}
+
+	while (total_read < buf_size) {
+		rc = read(fd, buf + total_read, buf_size - total_read);
+
+		if (rc < 0) {
+			warn_perror("Couldn't read from %s", path);
+			close(fd);
+			return -1;
+		}
+
+		if (rc == 0)
+			break;
+
+		total_read += rc;
+	}
+
+	close(fd);
+
+	if (total_read == buf_size) {
+		warn("File %s contents exceed buffer size %zu", path,
+		     buf_size);
+		buf[buf_size - 1] = '\0';
+		return -ENOBUFS;
+	}
+
+	buf[total_read] = '\0';
+
+	return total_read;
+}
+
+/**
+ * read_file_integer() - Read an integer value from a file
+ * @path:	Path to file to read
+ * @fallback:	Default value if file can't be read
+ *
+ * Return: integer value, @fallback on failure
+ */
+intmax_t read_file_integer(const char *path, intmax_t fallback)
+{
+	ssize_t bytes_read;
+	char buf[BUFSIZ];
+	intmax_t value;
+	char *end;
+
+	bytes_read = read_file(path, buf, sizeof(buf));
+
+	if (bytes_read < 0)
+		return fallback;
+
+	if (bytes_read == 0) {
+		debug("Empty file %s", path);
+		return fallback;
+	}
+
+	errno = 0;
+	value = strtoimax(buf, &end, 10);
+	if (*end && *end != '\n') {
+		debug("Non-numeric content in %s", path);
+		return fallback;
+	}
+	if (errno) {
+		debug("Out of range value in %s: %s", path, buf);
+		return fallback;
+	}
+
+	return value;
+}
+
 #ifdef __ia64__
 /* Needed by do_clone() below: glibc doesn't export the prototype of __clone2(),
  * use the description from clone(2).
diff --git a/util.h b/util.h
index a0b2ada..c1502cc 100644
--- a/util.h
+++ b/util.h
@@ -229,6 +229,8 @@ void pidfile_write(int fd, pid_t pid);
 int __daemon(int pidfile_fd, int devnull_fd);
 int fls(unsigned long x);
 int write_file(const char *path, const char *buf);
+ssize_t read_file(const char *path, char *buf, size_t buf_size);
+intmax_t read_file_integer(const char *path, intmax_t fallback);
 int write_all_buf(int fd, const void *buf, size_t len);
 int write_remainder(int fd, const struct iovec *iov, size_t iovcnt, size_t skip);
 int read_all_buf(int fd, void *buf, size_t len);
-- 
2.51.0


^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH v8 3/6] tcp: Add parameter struct ctx *c to tcp_timer_ctl()
  2025-11-10  9:31 [PATCH v8 0/6] Retry SYNs for inbound connections Yumei Huang
  2025-11-10  9:31 ` [PATCH v8 1/6] tcp: Rename "retrans" to "retries" Yumei Huang
  2025-11-10  9:31 ` [PATCH v8 2/6] util: Introduce read_file() and read_file_integer() function Yumei Huang
@ 2025-11-10  9:31 ` Yumei Huang
  2025-11-10 10:35   ` David Gibson
  2025-11-10  9:31 ` [PATCH v8 4/6] tcp: Resend SYN for inbound connections Yumei Huang
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 32+ messages in thread
From: Yumei Huang @ 2025-11-10  9:31 UTC (permalink / raw)
  To: passt-dev, sbrivio; +Cc: david, yuhuang

Signed-off-by: Yumei Huang <yuhuang@redhat.com>
---
 tcp.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/tcp.c b/tcp.c
index ca3d742..2f49327 100644
--- a/tcp.c
+++ b/tcp.c
@@ -543,11 +543,12 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
 
 /**
  * tcp_timer_ctl() - Set timerfd based on flags/events, create timerfd if needed
+ * @c:		Execution context
  * @conn:	Connection pointer
  *
  * #syscalls timerfd_create timerfd_settime
  */
-static void tcp_timer_ctl(struct tcp_tap_conn *conn)
+static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
 {
 	struct itimerspec it = { { 0 }, { 0 } };
 
@@ -631,7 +632,7 @@ void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,
 			 * flags and factor this into the logic below.
 			 */
 			if (flag == ACK_FROM_TAP_DUE)
-				tcp_timer_ctl(conn);
+				tcp_timer_ctl(c, conn);
 
 			return;
 		}
@@ -647,7 +648,7 @@ void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,
 	if (flag == ACK_FROM_TAP_DUE || flag == ACK_TO_TAP_DUE		  ||
 	    (flag == ~ACK_FROM_TAP_DUE && (conn->flags & ACK_TO_TAP_DUE)) ||
 	    (flag == ~ACK_TO_TAP_DUE   && (conn->flags & ACK_FROM_TAP_DUE)))
-		tcp_timer_ctl(conn);
+		tcp_timer_ctl(c, conn);
 }
 
 /**
@@ -702,7 +703,7 @@ void conn_event_do(const struct ctx *c, struct tcp_tap_conn *conn,
 		tcp_epoll_ctl(c, conn);
 
 	if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED))
-		tcp_timer_ctl(conn);
+		tcp_timer_ctl(c, conn);
 }
 
 /**
@@ -1770,7 +1771,7 @@ static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn,
 				   seq, conn->seq_from_tap);
 
 			tcp_send_flag(c, conn, ACK);
-			tcp_timer_ctl(conn);
+			tcp_timer_ctl(c, conn);
 
 			if (p->count == 1) {
 				tcp_tap_window_update(c, conn,
@@ -2421,7 +2422,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
 
 	if (conn->flags & ACK_TO_TAP_DUE) {
 		tcp_send_flag(c, conn, ACK_IF_NEEDED);
-		tcp_timer_ctl(conn);
+		tcp_timer_ctl(c, conn);
 	} else if (conn->flags & ACK_FROM_TAP_DUE) {
 		if (!(conn->events & ESTABLISHED)) {
 			flow_dbg(conn, "handshake timeout");
@@ -2443,7 +2444,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
 				return;
 
 			tcp_data_from_sock(c, conn);
-			tcp_timer_ctl(conn);
+			tcp_timer_ctl(c, conn);
 		}
 	} else {
 		struct itimerspec new = { { 0 }, { ACT_TIMEOUT, 0 } };
-- 
2.51.0


^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH v8 4/6] tcp: Resend SYN for inbound connections
  2025-11-10  9:31 [PATCH v8 0/6] Retry SYNs for inbound connections Yumei Huang
                   ` (2 preceding siblings ...)
  2025-11-10  9:31 ` [PATCH v8 3/6] tcp: Add parameter struct ctx *c to tcp_timer_ctl() Yumei Huang
@ 2025-11-10  9:31 ` Yumei Huang
  2025-11-10 10:46   ` David Gibson
  2025-11-18  0:19   ` Stefano Brivio
  2025-11-10  9:31 ` [PATCH v8 5/6] tcp: Update data retransmission timeout Yumei Huang
  2025-11-10  9:31 ` [PATCH v8 6/6] tcp: Clamp the retry timeout Yumei Huang
  5 siblings, 2 replies; 32+ messages in thread
From: Yumei Huang @ 2025-11-10  9:31 UTC (permalink / raw)
  To: passt-dev, sbrivio; +Cc: david, yuhuang

If a client connects while guest is not connected or ready yet,
resend SYN instead of just resetting connection after 10 seconds.

Use the same backoff calculation for the timeout as Linux kernel.

Link: https://bugs.passt.top/show_bug.cgi?id=153
Signed-off-by: Yumei Huang <yuhuang@redhat.com>
---
 tcp.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++---------
 tcp.h |  4 ++++
 2 files changed, 57 insertions(+), 9 deletions(-)

diff --git a/tcp.c b/tcp.c
index 2f49327..da40a99 100644
--- a/tcp.c
+++ b/tcp.c
@@ -179,9 +179,11 @@
  *
  * Timeouts are implemented by means of timerfd timers, set based on flags:
  *
- * - SYN_TIMEOUT: if no ACK is received from tap/guest during handshake (flag
- *   ACK_FROM_TAP_DUE without ESTABLISHED event) within this time, reset the
- *   connection
+ * - SYN_TIMEOUT_INIT: if no SYN,ACK is received from tap/guest during
+ *   handshake (flag ACK_FROM_TAP_DUE without ESTABLISHED event) within
+ *   this time, resend SYN. It's the starting timeout for the first SYN
+ *   retry. Retry for TCP_MAX_RETRIES or (syn_retries + syn_linear_timeouts)
+ *   times, reset the connection
  *
  * - ACK_TIMEOUT: if no ACK segment was received from tap/guest, after sending
  *   data (flag ACK_FROM_TAP_DUE with ESTABLISHED event), re-send data from the
@@ -340,7 +342,7 @@ enum {
 #define WINDOW_DEFAULT			14600		/* RFC 6928 */
 
 #define ACK_INTERVAL			10		/* ms */
-#define SYN_TIMEOUT			10		/* s */
+#define SYN_TIMEOUT_INIT		1		/* s, RFC 6928 */
 #define ACK_TIMEOUT			2
 #define FIN_TIMEOUT			60
 #define ACT_TIMEOUT			7200
@@ -365,6 +367,13 @@ uint8_t tcp_migrate_rcv_queue		[TCP_MIGRATE_RCV_QUEUE_MAX];
 
 #define TCP_MIGRATE_RESTORE_CHUNK_MIN	1024 /* Try smaller when above this */
 
+#define SYN_RETRIES		"/proc/sys/net/ipv4/tcp_syn_retries"
+#define SYN_LINEAR_TIMEOUTS	"/proc/sys/net/ipv4/tcp_syn_linear_timeouts"
+
+#define SYN_RETRIES_DEFAULT		6
+#define SYN_LINEAR_TIMEOUTS_DEFAULT	4
+#define MAX_SYNCNT			127 /* derived from kernel's limit */
+
 /* "Extended" data (not stored in the flow table) for TCP flow migration */
 static struct tcp_tap_transfer_ext migrate_ext[FLOW_MAX];
 
@@ -585,10 +594,13 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
 	if (conn->flags & ACK_TO_TAP_DUE) {
 		it.it_value.tv_nsec = (long)ACK_INTERVAL * 1000 * 1000;
 	} else if (conn->flags & ACK_FROM_TAP_DUE) {
-		if (!(conn->events & ESTABLISHED))
-			it.it_value.tv_sec = SYN_TIMEOUT;
-		else
+		if (!(conn->events & ESTABLISHED)) {
+			int exp = conn->retries - c->tcp.syn_linear_timeouts;
+			it.it_value.tv_sec = SYN_TIMEOUT_INIT << MAX(exp, 0);
+		}
+		else {
 			it.it_value.tv_sec = ACK_TIMEOUT;
+		}
 	} else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
 		it.it_value.tv_sec = FIN_TIMEOUT;
 	} else {
@@ -2425,8 +2437,18 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
 		tcp_timer_ctl(c, conn);
 	} else if (conn->flags & ACK_FROM_TAP_DUE) {
 		if (!(conn->events & ESTABLISHED)) {
-			flow_dbg(conn, "handshake timeout");
-			tcp_rst(c, conn);
+			int max;
+			max = c->tcp.syn_retries + c->tcp.syn_linear_timeouts;
+			max = MIN(TCP_MAX_RETRIES, max);
+			if (conn->retries >= max) {
+				flow_dbg(conn, "handshake timeout");
+				tcp_rst(c, conn);
+			} else {
+				flow_trace(conn, "SYN timeout, retry");
+				tcp_send_flag(c, conn, SYN);
+				conn->retries++;
+				tcp_timer_ctl(c, conn);
+			}
 		} else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
 			flow_dbg(conn, "FIN timeout");
 			tcp_rst(c, conn);
@@ -2782,6 +2804,26 @@ static socklen_t tcp_probe_tcp_info(void)
 	return sl;
 }
 
+/**
+ * tcp_get_rto_params() - Get host kernel RTO parameters
+ * @c:		Execution context
+ */
+void tcp_get_rto_params(struct ctx *c)
+{
+	intmax_t v;
+
+	v = read_file_integer(SYN_RETRIES, SYN_RETRIES_DEFAULT);
+	c->tcp.syn_retries = MIN(v, MAX_SYNCNT);
+
+	v = read_file_integer(SYN_LINEAR_TIMEOUTS, SYN_LINEAR_TIMEOUTS_DEFAULT);
+	c->tcp.syn_linear_timeouts = MIN(v, MAX_SYNCNT);
+
+	debug("Read sysctl values syn_retries: %"PRIu8
+	      ", syn_linear_timeouts: %"PRIu8,
+	      c->tcp.syn_retries,
+	      c->tcp.syn_linear_timeouts);
+}
+
 /**
  * tcp_init() - Get initial sequence, hash secret, initialise per-socket data
  * @c:		Execution context
@@ -2792,6 +2834,8 @@ int tcp_init(struct ctx *c)
 {
 	ASSERT(!c->no_tcp);
 
+	tcp_get_rto_params(c);
+
 	tcp_sock_iov_init(c);
 
 	memset(init_sock_pool4,		0xff,	sizeof(init_sock_pool4));
diff --git a/tcp.h b/tcp.h
index 0082386..37d7758 100644
--- a/tcp.h
+++ b/tcp.h
@@ -60,12 +60,16 @@ union tcp_listen_epoll_ref {
  * @fwd_out:		Port forwarding configuration for outbound packets
  * @timer_run:		Timestamp of most recent timer run
  * @pipe_size:		Size of pipes for spliced connections
+ * @syn_retries:	SYN retries using exponential backoff timeout
+ * @syn_linear_timeouts: SYN retries before using exponential backoff timeout
  */
 struct tcp_ctx {
 	struct fwd_ports fwd_in;
 	struct fwd_ports fwd_out;
 	struct timespec timer_run;
 	size_t pipe_size;
+	uint8_t syn_retries;
+	uint8_t syn_linear_timeouts;
 };
 
 #endif /* TCP_H */
-- 
2.51.0


^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH v8 5/6] tcp: Update data retransmission timeout
  2025-11-10  9:31 [PATCH v8 0/6] Retry SYNs for inbound connections Yumei Huang
                   ` (3 preceding siblings ...)
  2025-11-10  9:31 ` [PATCH v8 4/6] tcp: Resend SYN for inbound connections Yumei Huang
@ 2025-11-10  9:31 ` Yumei Huang
  2025-11-10  9:31 ` [PATCH v8 6/6] tcp: Clamp the retry timeout Yumei Huang
  5 siblings, 0 replies; 32+ messages in thread
From: Yumei Huang @ 2025-11-10  9:31 UTC (permalink / raw)
  To: passt-dev, sbrivio; +Cc: david, yuhuang

Use an exponential backoff timeout for data retransmission according
to RFC 2988 and RFC 6298. Set the initial RTO to one second as discussed
in Appendix A of RFC 6298.

Also combine the macros defining the initial RTO for both SYN and ACK.

Signed-off-by: Yumei Huang <yuhuang@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
---
 tcp.c | 31 ++++++++++++-------------------
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/tcp.c b/tcp.c
index da40a99..ee111e0 100644
--- a/tcp.c
+++ b/tcp.c
@@ -179,16 +179,13 @@
  *
  * Timeouts are implemented by means of timerfd timers, set based on flags:
  *
- * - SYN_TIMEOUT_INIT: if no SYN,ACK is received from tap/guest during
- *   handshake (flag ACK_FROM_TAP_DUE without ESTABLISHED event) within
- *   this time, resend SYN. It's the starting timeout for the first SYN
- *   retry. Retry for TCP_MAX_RETRIES or (syn_retries + syn_linear_timeouts)
- *   times, reset the connection
- *
- * - ACK_TIMEOUT: if no ACK segment was received from tap/guest, after sending
- *   data (flag ACK_FROM_TAP_DUE with ESTABLISHED event), re-send data from the
- *   socket and reset sequence to what was acknowledged. If this persists for
- *   more than TCP_MAX_RETRIES times in a row, reset the connection
+ * - RTO_INIT: if no ACK segment was received from tap/guest, either during
+ *   handshake (flag ACK_FROM_TAP_DUE without ESTABLISHED event) or after
+ *   sending data (flag ACK_FROM_TAP_DUE with ESTABLISHED event), re-send data
+ *   from the socket and reset sequence to what was acknowledged. This is the
+ *   timeout for the first retry, in seconds. Retry for TCP_MAX_RETRIES times
+ *   for established connections, or (syn_retries + syn_linear_timeouts) times
+ *   during the handshake, reset the connection
  *
  * - FIN_TIMEOUT: if a FIN segment was sent to tap/guest (flag ACK_FROM_TAP_DUE
  *   with TAP_FIN_SENT event), and no ACK is received within this time, reset
@@ -342,8 +339,7 @@ enum {
 #define WINDOW_DEFAULT			14600		/* RFC 6928 */
 
 #define ACK_INTERVAL			10		/* ms */
-#define SYN_TIMEOUT_INIT		1		/* s, RFC 6928 */
-#define ACK_TIMEOUT			2
+#define RTO_INIT			1		/* s, RFC 6298 */
 #define FIN_TIMEOUT			60
 #define ACT_TIMEOUT			7200
 
@@ -594,13 +590,10 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
 	if (conn->flags & ACK_TO_TAP_DUE) {
 		it.it_value.tv_nsec = (long)ACK_INTERVAL * 1000 * 1000;
 	} else if (conn->flags & ACK_FROM_TAP_DUE) {
-		if (!(conn->events & ESTABLISHED)) {
-			int exp = conn->retries - c->tcp.syn_linear_timeouts;
-			it.it_value.tv_sec = SYN_TIMEOUT_INIT << MAX(exp, 0);
-		}
-		else {
-			it.it_value.tv_sec = ACK_TIMEOUT;
-		}
+		int exp = conn->retries;
+		if (!(conn->events & ESTABLISHED))
+			exp -= c->tcp.syn_linear_timeouts;
+		it.it_value.tv_sec = RTO_INIT << MAX(exp, 0);
 	} else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
 		it.it_value.tv_sec = FIN_TIMEOUT;
 	} else {
-- 
2.51.0


^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH v8 6/6] tcp: Clamp the retry timeout
  2025-11-10  9:31 [PATCH v8 0/6] Retry SYNs for inbound connections Yumei Huang
                   ` (4 preceding siblings ...)
  2025-11-10  9:31 ` [PATCH v8 5/6] tcp: Update data retransmission timeout Yumei Huang
@ 2025-11-10  9:31 ` Yumei Huang
  2025-11-10 10:56   ` David Gibson
  5 siblings, 1 reply; 32+ messages in thread
From: Yumei Huang @ 2025-11-10  9:31 UTC (permalink / raw)
  To: passt-dev, sbrivio; +Cc: david, yuhuang

Clamp the TCP retry timeout as Linux kernel does. If a retry occurs
during the handshake and the RTO is below 3 seconds, re-initialise
it to 3 seconds for data retransmissions according to RFC 6298.

Suggested-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: Yumei Huang <yuhuang@redhat.com>
---
 tcp.c      | 25 ++++++++++++++++++++-----
 tcp.h      |  2 ++
 tcp_conn.h |  1 +
 3 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/tcp.c b/tcp.c
index ee111e0..b015658 100644
--- a/tcp.c
+++ b/tcp.c
@@ -187,6 +187,9 @@
  *   for established connections, or (syn_retries + syn_linear_timeouts) times
  *   during the handshake, reset the connection
  *
+ * - RTO_INIT_ACK: if the RTO is less than this, re-initialise RTO to this for
+ *   data retransmissions
+ *
  * - FIN_TIMEOUT: if a FIN segment was sent to tap/guest (flag ACK_FROM_TAP_DUE
  *   with TAP_FIN_SENT event), and no ACK is received within this time, reset
  *   the connection
@@ -340,6 +343,7 @@ enum {
 
 #define ACK_INTERVAL			10		/* ms */
 #define RTO_INIT			1		/* s, RFC 6298 */
+#define RTO_INIT_ACK			3		/* s, RFC 6298 */
 #define FIN_TIMEOUT			60
 #define ACT_TIMEOUT			7200
 
@@ -365,9 +369,11 @@ uint8_t tcp_migrate_rcv_queue		[TCP_MIGRATE_RCV_QUEUE_MAX];
 
 #define SYN_RETRIES		"/proc/sys/net/ipv4/tcp_syn_retries"
 #define SYN_LINEAR_TIMEOUTS	"/proc/sys/net/ipv4/tcp_syn_linear_timeouts"
+#define RTO_MAX_MS		"/proc/sys/net/ipv4/tcp_rto_max_ms"
 
 #define SYN_RETRIES_DEFAULT		6
 #define SYN_LINEAR_TIMEOUTS_DEFAULT	4
+#define RTO_MAX_MS_DEFAULT		120000
 #define MAX_SYNCNT			127 /* derived from kernel's limit */
 
 /* "Extended" data (not stored in the flow table) for TCP flow migration */
@@ -392,7 +398,7 @@ static const char *tcp_state_str[] __attribute((__unused__)) = {
 
 static const char *tcp_flag_str[] __attribute((__unused__)) = {
 	"STALLED", "LOCAL", "ACTIVE_CLOSE", "ACK_TO_TAP_DUE",
-	"ACK_FROM_TAP_DUE", "ACK_FROM_TAP_BLOCKS",
+	"ACK_FROM_TAP_DUE", "ACK_FROM_TAP_BLOCKS", "SYN_RETRIED",
 };
 
 /* Listening sockets, used for automatic port forwarding in pasta mode only */
@@ -590,10 +596,13 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
 	if (conn->flags & ACK_TO_TAP_DUE) {
 		it.it_value.tv_nsec = (long)ACK_INTERVAL * 1000 * 1000;
 	} else if (conn->flags & ACK_FROM_TAP_DUE) {
-		int exp = conn->retries;
+		int exp = conn->retries, timeout = RTO_INIT;
 		if (!(conn->events & ESTABLISHED))
 			exp -= c->tcp.syn_linear_timeouts;
-		it.it_value.tv_sec = RTO_INIT << MAX(exp, 0);
+		else if (conn->flags & SYN_RETRIED)
+			timeout = MAX(timeout, RTO_INIT_ACK);
+		timeout <<= MAX(exp, 0);
+		it.it_value.tv_sec = MIN(timeout, c->tcp.rto_max);
 	} else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
 		it.it_value.tv_sec = FIN_TIMEOUT;
 	} else {
@@ -2440,6 +2449,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
 				flow_trace(conn, "SYN timeout, retry");
 				tcp_send_flag(c, conn, SYN);
 				conn->retries++;
+				conn_flag(c, conn, SYN_RETRIED);
 				tcp_timer_ctl(c, conn);
 			}
 		} else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
@@ -2811,10 +2821,15 @@ void tcp_get_rto_params(struct ctx *c)
 	v = read_file_integer(SYN_LINEAR_TIMEOUTS, SYN_LINEAR_TIMEOUTS_DEFAULT);
 	c->tcp.syn_linear_timeouts = MIN(v, MAX_SYNCNT);
 
+	v = read_file_integer(RTO_MAX_MS, RTO_MAX_MS_DEFAULT);
+	c->tcp.rto_max = MIN(DIV_ROUND_CLOSEST(v, 1000), INT_MAX);
+
 	debug("Read sysctl values syn_retries: %"PRIu8
-	      ", syn_linear_timeouts: %"PRIu8,
+	      ", syn_linear_timeouts: %"PRIu8
+	      ", rto_max: %d",
 	      c->tcp.syn_retries,
-	      c->tcp.syn_linear_timeouts);
+	      c->tcp.syn_linear_timeouts,
+	      c->tcp.rto_max);
 }
 
 /**
diff --git a/tcp.h b/tcp.h
index 37d7758..c4945c3 100644
--- a/tcp.h
+++ b/tcp.h
@@ -60,6 +60,7 @@ union tcp_listen_epoll_ref {
  * @fwd_out:		Port forwarding configuration for outbound packets
  * @timer_run:		Timestamp of most recent timer run
  * @pipe_size:		Size of pipes for spliced connections
+ * @rto_max:		Maximal retry timeout (in s)
  * @syn_retries:	SYN retries using exponential backoff timeout
  * @syn_linear_timeouts: SYN retries before using exponential backoff timeout
  */
@@ -68,6 +69,7 @@ struct tcp_ctx {
 	struct fwd_ports fwd_out;
 	struct timespec timer_run;
 	size_t pipe_size;
+	int rto_max;
 	uint8_t syn_retries;
 	uint8_t syn_linear_timeouts;
 };
diff --git a/tcp_conn.h b/tcp_conn.h
index 923af36..e36910c 100644
--- a/tcp_conn.h
+++ b/tcp_conn.h
@@ -77,6 +77,7 @@ struct tcp_tap_conn {
 #define ACK_TO_TAP_DUE		BIT(3)
 #define ACK_FROM_TAP_DUE	BIT(4)
 #define ACK_FROM_TAP_BLOCKS	BIT(5)
+#define SYN_RETRIED		BIT(6)
 
 #define SNDBUF_BITS		24
 	unsigned int	sndbuf		:SNDBUF_BITS;
-- 
2.51.0


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v8 3/6] tcp: Add parameter struct ctx *c to tcp_timer_ctl()
  2025-11-10  9:31 ` [PATCH v8 3/6] tcp: Add parameter struct ctx *c to tcp_timer_ctl() Yumei Huang
@ 2025-11-10 10:35   ` David Gibson
  2025-11-14  0:01     ` Stefano Brivio
  0 siblings, 1 reply; 32+ messages in thread
From: David Gibson @ 2025-11-10 10:35 UTC (permalink / raw)
  To: Yumei Huang; +Cc: passt-dev, sbrivio

[-- Attachment #1: Type: text/plain, Size: 3164 bytes --]

On Mon, Nov 10, 2025 at 05:31:34PM +0800, Yumei Huang wrote:

Should have a commit message explaining that it was recently removed,
and why you need it back.  Stefano might be able to add that on merge, though.

> Signed-off-by: Yumei Huang <yuhuang@redhat.com>

For the code itself,

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
>  tcp.c | 15 ++++++++-------
>  1 file changed, 8 insertions(+), 7 deletions(-)
> 
> diff --git a/tcp.c b/tcp.c
> index ca3d742..2f49327 100644
> --- a/tcp.c
> +++ b/tcp.c
> @@ -543,11 +543,12 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
>  
>  /**
>   * tcp_timer_ctl() - Set timerfd based on flags/events, create timerfd if needed
> + * @c:		Execution context
>   * @conn:	Connection pointer
>   *
>   * #syscalls timerfd_create timerfd_settime
>   */
> -static void tcp_timer_ctl(struct tcp_tap_conn *conn)
> +static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
>  {
>  	struct itimerspec it = { { 0 }, { 0 } };
>  
> @@ -631,7 +632,7 @@ void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,
>  			 * flags and factor this into the logic below.
>  			 */
>  			if (flag == ACK_FROM_TAP_DUE)
> -				tcp_timer_ctl(conn);
> +				tcp_timer_ctl(c, conn);
>  
>  			return;
>  		}
> @@ -647,7 +648,7 @@ void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,
>  	if (flag == ACK_FROM_TAP_DUE || flag == ACK_TO_TAP_DUE		  ||
>  	    (flag == ~ACK_FROM_TAP_DUE && (conn->flags & ACK_TO_TAP_DUE)) ||
>  	    (flag == ~ACK_TO_TAP_DUE   && (conn->flags & ACK_FROM_TAP_DUE)))
> -		tcp_timer_ctl(conn);
> +		tcp_timer_ctl(c, conn);
>  }
>  
>  /**
> @@ -702,7 +703,7 @@ void conn_event_do(const struct ctx *c, struct tcp_tap_conn *conn,
>  		tcp_epoll_ctl(c, conn);
>  
>  	if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED))
> -		tcp_timer_ctl(conn);
> +		tcp_timer_ctl(c, conn);
>  }
>  
>  /**
> @@ -1770,7 +1771,7 @@ static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn,
>  				   seq, conn->seq_from_tap);
>  
>  			tcp_send_flag(c, conn, ACK);
> -			tcp_timer_ctl(conn);
> +			tcp_timer_ctl(c, conn);
>  
>  			if (p->count == 1) {
>  				tcp_tap_window_update(c, conn,
> @@ -2421,7 +2422,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
>  
>  	if (conn->flags & ACK_TO_TAP_DUE) {
>  		tcp_send_flag(c, conn, ACK_IF_NEEDED);
> -		tcp_timer_ctl(conn);
> +		tcp_timer_ctl(c, conn);
>  	} else if (conn->flags & ACK_FROM_TAP_DUE) {
>  		if (!(conn->events & ESTABLISHED)) {
>  			flow_dbg(conn, "handshake timeout");
> @@ -2443,7 +2444,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
>  				return;
>  
>  			tcp_data_from_sock(c, conn);
> -			tcp_timer_ctl(conn);
> +			tcp_timer_ctl(c, conn);
>  		}
>  	} else {
>  		struct itimerspec new = { { 0 }, { ACT_TIMEOUT, 0 } };
> -- 
> 2.51.0
> 

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v8 4/6] tcp: Resend SYN for inbound connections
  2025-11-10  9:31 ` [PATCH v8 4/6] tcp: Resend SYN for inbound connections Yumei Huang
@ 2025-11-10 10:46   ` David Gibson
  2025-11-14  0:00     ` Stefano Brivio
  2025-11-18  0:19   ` Stefano Brivio
  1 sibling, 1 reply; 32+ messages in thread
From: David Gibson @ 2025-11-10 10:46 UTC (permalink / raw)
  To: Yumei Huang; +Cc: passt-dev, sbrivio

[-- Attachment #1: Type: text/plain, Size: 6350 bytes --]

On Mon, Nov 10, 2025 at 05:31:35PM +0800, Yumei Huang wrote:
> If a client connects while guest is not connected or ready yet,
> resend SYN instead of just resetting connection after 10 seconds.
> 
> Use the same backoff calculation for the timeout as Linux kernel.
> 
> Link: https://bugs.passt.top/show_bug.cgi?id=153
> Signed-off-by: Yumei Huang <yuhuang@redhat.com>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

Though I have one small concern remaining

> ---
>  tcp.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++---------
>  tcp.h |  4 ++++
>  2 files changed, 57 insertions(+), 9 deletions(-)
> 
> diff --git a/tcp.c b/tcp.c
> index 2f49327..da40a99 100644
> --- a/tcp.c
> +++ b/tcp.c
> @@ -179,9 +179,11 @@
>   *
>   * Timeouts are implemented by means of timerfd timers, set based on flags:
>   *
> - * - SYN_TIMEOUT: if no ACK is received from tap/guest during handshake (flag
> - *   ACK_FROM_TAP_DUE without ESTABLISHED event) within this time, reset the
> - *   connection
> + * - SYN_TIMEOUT_INIT: if no SYN,ACK is received from tap/guest during
> + *   handshake (flag ACK_FROM_TAP_DUE without ESTABLISHED event) within
> + *   this time, resend SYN. It's the starting timeout for the first SYN
> + *   retry. Retry for TCP_MAX_RETRIES or (syn_retries + syn_linear_timeouts)
> + *   times, reset the connection
>   *
>   * - ACK_TIMEOUT: if no ACK segment was received from tap/guest, after sending
>   *   data (flag ACK_FROM_TAP_DUE with ESTABLISHED event), re-send data from the
> @@ -340,7 +342,7 @@ enum {
>  #define WINDOW_DEFAULT			14600		/* RFC 6928 */
>  
>  #define ACK_INTERVAL			10		/* ms */
> -#define SYN_TIMEOUT			10		/* s */
> +#define SYN_TIMEOUT_INIT		1		/* s, RFC 6928 */
>  #define ACK_TIMEOUT			2
>  #define FIN_TIMEOUT			60
>  #define ACT_TIMEOUT			7200
> @@ -365,6 +367,13 @@ uint8_t tcp_migrate_rcv_queue		[TCP_MIGRATE_RCV_QUEUE_MAX];
>  
>  #define TCP_MIGRATE_RESTORE_CHUNK_MIN	1024 /* Try smaller when above this */
>  
> +#define SYN_RETRIES		"/proc/sys/net/ipv4/tcp_syn_retries"
> +#define SYN_LINEAR_TIMEOUTS	"/proc/sys/net/ipv4/tcp_syn_linear_timeouts"
> +
> +#define SYN_RETRIES_DEFAULT		6
> +#define SYN_LINEAR_TIMEOUTS_DEFAULT	4
> +#define MAX_SYNCNT			127 /* derived from kernel's limit */
> +
>  /* "Extended" data (not stored in the flow table) for TCP flow migration */
>  static struct tcp_tap_transfer_ext migrate_ext[FLOW_MAX];
>  
> @@ -585,10 +594,13 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
>  	if (conn->flags & ACK_TO_TAP_DUE) {
>  		it.it_value.tv_nsec = (long)ACK_INTERVAL * 1000 * 1000;
>  	} else if (conn->flags & ACK_FROM_TAP_DUE) {
> -		if (!(conn->events & ESTABLISHED))
> -			it.it_value.tv_sec = SYN_TIMEOUT;
> -		else
> +		if (!(conn->events & ESTABLISHED)) {
> +			int exp = conn->retries - c->tcp.syn_linear_timeouts;

As discussed in the thread on the last version, the subtraction will
be done unsigned, so the final result depends on behaviour of casting
a "negative" (that is, close to max value) unsigned value into a
signed variable.  I'm pretty sure that will do the right thing in
practice, but I don't believe that behaviour is guaranteed by the C
standard.

This probably isn't worth a respin, though.

> +			it.it_value.tv_sec = SYN_TIMEOUT_INIT << MAX(exp, 0);
> +		}
> +		else {
>  			it.it_value.tv_sec = ACK_TIMEOUT;
> +		}
>  	} else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
>  		it.it_value.tv_sec = FIN_TIMEOUT;
>  	} else {
> @@ -2425,8 +2437,18 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
>  		tcp_timer_ctl(c, conn);
>  	} else if (conn->flags & ACK_FROM_TAP_DUE) {
>  		if (!(conn->events & ESTABLISHED)) {
> -			flow_dbg(conn, "handshake timeout");
> -			tcp_rst(c, conn);
> +			int max;
> +			max = c->tcp.syn_retries + c->tcp.syn_linear_timeouts;
> +			max = MIN(TCP_MAX_RETRIES, max);
> +			if (conn->retries >= max) {
> +				flow_dbg(conn, "handshake timeout");
> +				tcp_rst(c, conn);
> +			} else {
> +				flow_trace(conn, "SYN timeout, retry");
> +				tcp_send_flag(c, conn, SYN);
> +				conn->retries++;
> +				tcp_timer_ctl(c, conn);
> +			}
>  		} else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
>  			flow_dbg(conn, "FIN timeout");
>  			tcp_rst(c, conn);
> @@ -2782,6 +2804,26 @@ static socklen_t tcp_probe_tcp_info(void)
>  	return sl;
>  }
>  
> +/**
> + * tcp_get_rto_params() - Get host kernel RTO parameters
> + * @c:		Execution context
> + */
> +void tcp_get_rto_params(struct ctx *c)
> +{
> +	intmax_t v;
> +
> +	v = read_file_integer(SYN_RETRIES, SYN_RETRIES_DEFAULT);
> +	c->tcp.syn_retries = MIN(v, MAX_SYNCNT);
> +
> +	v = read_file_integer(SYN_LINEAR_TIMEOUTS, SYN_LINEAR_TIMEOUTS_DEFAULT);
> +	c->tcp.syn_linear_timeouts = MIN(v, MAX_SYNCNT);
> +
> +	debug("Read sysctl values syn_retries: %"PRIu8
> +	      ", syn_linear_timeouts: %"PRIu8,
> +	      c->tcp.syn_retries,
> +	      c->tcp.syn_linear_timeouts);
> +}
> +
>  /**
>   * tcp_init() - Get initial sequence, hash secret, initialise per-socket data
>   * @c:		Execution context
> @@ -2792,6 +2834,8 @@ int tcp_init(struct ctx *c)
>  {
>  	ASSERT(!c->no_tcp);
>  
> +	tcp_get_rto_params(c);
> +
>  	tcp_sock_iov_init(c);
>  
>  	memset(init_sock_pool4,		0xff,	sizeof(init_sock_pool4));
> diff --git a/tcp.h b/tcp.h
> index 0082386..37d7758 100644
> --- a/tcp.h
> +++ b/tcp.h
> @@ -60,12 +60,16 @@ union tcp_listen_epoll_ref {
>   * @fwd_out:		Port forwarding configuration for outbound packets
>   * @timer_run:		Timestamp of most recent timer run
>   * @pipe_size:		Size of pipes for spliced connections
> + * @syn_retries:	SYN retries using exponential backoff timeout
> + * @syn_linear_timeouts: SYN retries before using exponential backoff timeout
>   */
>  struct tcp_ctx {
>  	struct fwd_ports fwd_in;
>  	struct fwd_ports fwd_out;
>  	struct timespec timer_run;
>  	size_t pipe_size;
> +	uint8_t syn_retries;
> +	uint8_t syn_linear_timeouts;
>  };
>  
>  #endif /* TCP_H */
> -- 
> 2.51.0
> 

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v8 6/6] tcp: Clamp the retry timeout
  2025-11-10  9:31 ` [PATCH v8 6/6] tcp: Clamp the retry timeout Yumei Huang
@ 2025-11-10 10:56   ` David Gibson
  2025-11-14  0:01     ` Stefano Brivio
  0 siblings, 1 reply; 32+ messages in thread
From: David Gibson @ 2025-11-10 10:56 UTC (permalink / raw)
  To: Yumei Huang; +Cc: passt-dev, sbrivio

[-- Attachment #1: Type: text/plain, Size: 5786 bytes --]

On Mon, Nov 10, 2025 at 05:31:37PM +0800, Yumei Huang wrote:
> Clamp the TCP retry timeout as Linux kernel does. If a retry occurs
> during the handshake and the RTO is below 3 seconds, re-initialise
> it to 3 seconds for data retransmissions according to RFC 6298.
> 
> Suggested-by: Stefano Brivio <sbrivio@redhat.com>
> Signed-off-by: Yumei Huang <yuhuang@redhat.com>

Looks correct, so

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

A few possible improvements (mostly comments/names) below.

> ---
>  tcp.c      | 25 ++++++++++++++++++++-----
>  tcp.h      |  2 ++
>  tcp_conn.h |  1 +
>  3 files changed, 23 insertions(+), 5 deletions(-)
> 
> diff --git a/tcp.c b/tcp.c
> index ee111e0..b015658 100644
> --- a/tcp.c
> +++ b/tcp.c
> @@ -187,6 +187,9 @@
>   *   for established connections, or (syn_retries + syn_linear_timeouts) times
>   *   during the handshake, reset the connection
>   *
> + * - RTO_INIT_ACK: if the RTO is less than this, re-initialise RTO to this for
> + *   data retransmissions
> + *

Now that this is conditional on SYN being retried, the description
doesn't look entirely accurate any more.

>   * - FIN_TIMEOUT: if a FIN segment was sent to tap/guest (flag ACK_FROM_TAP_DUE
>   *   with TAP_FIN_SENT event), and no ACK is received within this time, reset
>   *   the connection
> @@ -340,6 +343,7 @@ enum {
>  
>  #define ACK_INTERVAL			10		/* ms */
>  #define RTO_INIT			1		/* s, RFC 6298 */
> +#define RTO_INIT_ACK			3		/* s, RFC 6298 */

The relevance of "_ACK" in the name is not obvious to me.

>  #define FIN_TIMEOUT			60
>  #define ACT_TIMEOUT			7200
>  
-> @@ -365,9 +369,11 @@ uint8_t tcp_migrate_rcv_queue		[TCP_MIGRATE_RCV_QUEUE_MAX];
>  
>  #define SYN_RETRIES		"/proc/sys/net/ipv4/tcp_syn_retries"
>  #define SYN_LINEAR_TIMEOUTS	"/proc/sys/net/ipv4/tcp_syn_linear_timeouts"
> +#define RTO_MAX_MS		"/proc/sys/net/ipv4/tcp_rto_max_ms"
>  
>  #define SYN_RETRIES_DEFAULT		6
>  #define SYN_LINEAR_TIMEOUTS_DEFAULT	4
> +#define RTO_MAX_MS_DEFAULT		120000
>  #define MAX_SYNCNT			127 /* derived from kernel's limit */
>  
>  /* "Extended" data (not stored in the flow table) for TCP flow migration */
> @@ -392,7 +398,7 @@ static const char *tcp_state_str[] __attribute((__unused__)) = {
>  
>  static const char *tcp_flag_str[] __attribute((__unused__)) = {
>  	"STALLED", "LOCAL", "ACTIVE_CLOSE", "ACK_TO_TAP_DUE",
> -	"ACK_FROM_TAP_DUE", "ACK_FROM_TAP_BLOCKS",
> +	"ACK_FROM_TAP_DUE", "ACK_FROM_TAP_BLOCKS", "SYN_RETRIED",
>  };
>  
>  /* Listening sockets, used for automatic port forwarding in pasta mode only */
> @@ -590,10 +596,13 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
>  	if (conn->flags & ACK_TO_TAP_DUE) {
>  		it.it_value.tv_nsec = (long)ACK_INTERVAL * 1000 * 1000;
>  	} else if (conn->flags & ACK_FROM_TAP_DUE) {
> -		int exp = conn->retries;
> +		int exp = conn->retries, timeout = RTO_INIT;
>  		if (!(conn->events & ESTABLISHED))
>  			exp -= c->tcp.syn_linear_timeouts;
> -		it.it_value.tv_sec = RTO_INIT << MAX(exp, 0);
> +		else if (conn->flags & SYN_RETRIED)
> +			timeout = MAX(timeout, RTO_INIT_ACK);
> +		timeout <<= MAX(exp, 0);
> +		it.it_value.tv_sec = MIN(timeout, c->tcp.rto_max);
>  	} else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
>  		it.it_value.tv_sec = FIN_TIMEOUT;
>  	} else {
> @@ -2440,6 +2449,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
>  				flow_trace(conn, "SYN timeout, retry");
>  				tcp_send_flag(c, conn, SYN);
>  				conn->retries++;
> +				conn_flag(c, conn, SYN_RETRIED);
>  				tcp_timer_ctl(c, conn);
>  			}
>  		} else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
> @@ -2811,10 +2821,15 @@ void tcp_get_rto_params(struct ctx *c)
>  	v = read_file_integer(SYN_LINEAR_TIMEOUTS, SYN_LINEAR_TIMEOUTS_DEFAULT);
>  	c->tcp.syn_linear_timeouts = MIN(v, MAX_SYNCNT);
>  
> +	v = read_file_integer(RTO_MAX_MS, RTO_MAX_MS_DEFAULT);
> +	c->tcp.rto_max = MIN(DIV_ROUND_CLOSEST(v, 1000), INT_MAX);

Possibly we should verify this is => RTO_INIT.

> +
>  	debug("Read sysctl values syn_retries: %"PRIu8
> -	      ", syn_linear_timeouts: %"PRIu8,
> +	      ", syn_linear_timeouts: %"PRIu8
> +	      ", rto_max: %d",
>  	      c->tcp.syn_retries,
> -	      c->tcp.syn_linear_timeouts);
> +	      c->tcp.syn_linear_timeouts,
> +	      c->tcp.rto_max);
>  }
>  
>  /**
> diff --git a/tcp.h b/tcp.h
> index 37d7758..c4945c3 100644
> --- a/tcp.h
> +++ b/tcp.h
> @@ -60,6 +60,7 @@ union tcp_listen_epoll_ref {
>   * @fwd_out:		Port forwarding configuration for outbound packets
>   * @timer_run:		Timestamp of most recent timer run
>   * @pipe_size:		Size of pipes for spliced connections
> + * @rto_max:		Maximal retry timeout (in s)
>   * @syn_retries:	SYN retries using exponential backoff timeout
>   * @syn_linear_timeouts: SYN retries before using exponential backoff timeout
>   */
> @@ -68,6 +69,7 @@ struct tcp_ctx {
>  	struct fwd_ports fwd_out;
>  	struct timespec timer_run;
>  	size_t pipe_size;
> +	int rto_max;
>  	uint8_t syn_retries;
>  	uint8_t syn_linear_timeouts;
>  };
> diff --git a/tcp_conn.h b/tcp_conn.h
> index 923af36..e36910c 100644
> --- a/tcp_conn.h
> +++ b/tcp_conn.h
> @@ -77,6 +77,7 @@ struct tcp_tap_conn {
>  #define ACK_TO_TAP_DUE		BIT(3)
>  #define ACK_FROM_TAP_DUE	BIT(4)
>  #define ACK_FROM_TAP_BLOCKS	BIT(5)
> +#define SYN_RETRIED		BIT(6)
>  
>  #define SNDBUF_BITS		24
>  	unsigned int	sndbuf		:SNDBUF_BITS;
> -- 
> 2.51.0
> 

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v8 4/6] tcp: Resend SYN for inbound connections
  2025-11-10 10:46   ` David Gibson
@ 2025-11-14  0:00     ` Stefano Brivio
  2025-11-14  0:31       ` David Gibson
  0 siblings, 1 reply; 32+ messages in thread
From: Stefano Brivio @ 2025-11-14  0:00 UTC (permalink / raw)
  To: David Gibson; +Cc: Yumei Huang, passt-dev

On Mon, 10 Nov 2025 21:46:55 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> On Mon, Nov 10, 2025 at 05:31:35PM +0800, Yumei Huang wrote:
> > If a client connects while guest is not connected or ready yet,
> > resend SYN instead of just resetting connection after 10 seconds.
> > 
> > Use the same backoff calculation for the timeout as Linux kernel.
> > 
> > Link: https://bugs.passt.top/show_bug.cgi?id=153
> > Signed-off-by: Yumei Huang <yuhuang@redhat.com>  
> 
> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> 
> Though I have one small concern remaining
> 
> > ---
> >  tcp.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++---------
> >  tcp.h |  4 ++++
> >  2 files changed, 57 insertions(+), 9 deletions(-)
> > 
> > diff --git a/tcp.c b/tcp.c
> > index 2f49327..da40a99 100644
> > --- a/tcp.c
> > +++ b/tcp.c
> > @@ -179,9 +179,11 @@
> >   *
> >   * Timeouts are implemented by means of timerfd timers, set based on flags:
> >   *
> > - * - SYN_TIMEOUT: if no ACK is received from tap/guest during handshake (flag
> > - *   ACK_FROM_TAP_DUE without ESTABLISHED event) within this time, reset the
> > - *   connection
> > + * - SYN_TIMEOUT_INIT: if no SYN,ACK is received from tap/guest during
> > + *   handshake (flag ACK_FROM_TAP_DUE without ESTABLISHED event) within
> > + *   this time, resend SYN. It's the starting timeout for the first SYN
> > + *   retry. Retry for TCP_MAX_RETRIES or (syn_retries + syn_linear_timeouts)
> > + *   times, reset the connection
> >   *
> >   * - ACK_TIMEOUT: if no ACK segment was received from tap/guest, after sending
> >   *   data (flag ACK_FROM_TAP_DUE with ESTABLISHED event), re-send data from the
> > @@ -340,7 +342,7 @@ enum {
> >  #define WINDOW_DEFAULT			14600		/* RFC 6928 */
> >  
> >  #define ACK_INTERVAL			10		/* ms */
> > -#define SYN_TIMEOUT			10		/* s */
> > +#define SYN_TIMEOUT_INIT		1		/* s, RFC 6928 */
> >  #define ACK_TIMEOUT			2
> >  #define FIN_TIMEOUT			60
> >  #define ACT_TIMEOUT			7200
> > @@ -365,6 +367,13 @@ uint8_t tcp_migrate_rcv_queue		[TCP_MIGRATE_RCV_QUEUE_MAX];
> >  
> >  #define TCP_MIGRATE_RESTORE_CHUNK_MIN	1024 /* Try smaller when above this */
> >  
> > +#define SYN_RETRIES		"/proc/sys/net/ipv4/tcp_syn_retries"
> > +#define SYN_LINEAR_TIMEOUTS	"/proc/sys/net/ipv4/tcp_syn_linear_timeouts"
> > +
> > +#define SYN_RETRIES_DEFAULT		6
> > +#define SYN_LINEAR_TIMEOUTS_DEFAULT	4
> > +#define MAX_SYNCNT			127 /* derived from kernel's limit */
> > +
> >  /* "Extended" data (not stored in the flow table) for TCP flow migration */
> >  static struct tcp_tap_transfer_ext migrate_ext[FLOW_MAX];
> >  
> > @@ -585,10 +594,13 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
> >  	if (conn->flags & ACK_TO_TAP_DUE) {
> >  		it.it_value.tv_nsec = (long)ACK_INTERVAL * 1000 * 1000;
> >  	} else if (conn->flags & ACK_FROM_TAP_DUE) {
> > -		if (!(conn->events & ESTABLISHED))
> > -			it.it_value.tv_sec = SYN_TIMEOUT;
> > -		else
> > +		if (!(conn->events & ESTABLISHED)) {
> > +			int exp = conn->retries - c->tcp.syn_linear_timeouts;  
> 
> As discussed in the thread on the last version, the subtraction will
> be done unsigned, so the final result depends on behaviour of casting
> a "negative" (that is, close to max value) unsigned value into a
> signed variable.  I'm pretty sure that will do the right thing in
> practice, but I don't believe that behaviour is guaranteed by the C
> standard.

Surprisingly to me, C99 6.2.6.2 "Integer types" admits three
interpretations of the sign bit for signed types, quoting:

  — the corresponding value with sign bit 0 is negated (sign and
    magnitude);
  — the sign bit has the value −(2N ) (two’s complement);
  — the sign bit has the value −(2N − 1) (ones’ complement).

and this, together with the conversion rule from 6.3.1.3 "Signed
and unsigned integers":

  Otherwise, the new type is signed and the value cannot be represented
  in it; either the result is implementation-defined or an
  implementation-defined signal is raised.

seems to confirm your interpretation, even though I'm really having
a hard time convincing myself that we should actually do stuff like:

			int exp;

			exp = (int)conn->retries - c->tcp.syn_linear_timeouts;

and I still have the suspicion we're missing something. Is this syntax
what you're suggesting, by the way?

> This probably isn't worth a respin, though.

Maybe not but:

> > +			it.it_value.tv_sec = SYN_TIMEOUT_INIT << MAX(exp, 0);
> > +		}
> > +		else {

this is another bit I would need to change. The coding style dictates:

		if (x) {
			...
		} else {
			...
		}

*not*:

		if (x) {
			...
		}
		else {
			...
		}

...on the other hand, it goes away in 5/6, so I don't care
particularly.

It still matters a very tiny bit because if we need to revert the
commit resulting from 5/6 for any reason we'll end up with broken
coding style. But that's not a realistic scenario and it's very minor
anyway.

> >  			it.it_value.tv_sec = ACK_TIMEOUT;
> > +		}
> >  	} else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
> >  		it.it_value.tv_sec = FIN_TIMEOUT;
> >  	} else {
> > @@ -2425,8 +2437,18 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
> >  		tcp_timer_ctl(c, conn);
> >  	} else if (conn->flags & ACK_FROM_TAP_DUE) {
> >  		if (!(conn->events & ESTABLISHED)) {
> > -			flow_dbg(conn, "handshake timeout");
> > -			tcp_rst(c, conn);
> > +			int max;
> > +			max = c->tcp.syn_retries + c->tcp.syn_linear_timeouts;
> > +			max = MIN(TCP_MAX_RETRIES, max);
> > +			if (conn->retries >= max) {
> > +				flow_dbg(conn, "handshake timeout");
> > +				tcp_rst(c, conn);
> > +			} else {
> > +				flow_trace(conn, "SYN timeout, retry");
> > +				tcp_send_flag(c, conn, SYN);
> > +				conn->retries++;
> > +				tcp_timer_ctl(c, conn);
> > +			}
> >  		} else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
> >  			flow_dbg(conn, "FIN timeout");
> >  			tcp_rst(c, conn);
> > @@ -2782,6 +2804,26 @@ static socklen_t tcp_probe_tcp_info(void)
> >  	return sl;
> >  }
> >  
> > +/**
> > + * tcp_get_rto_params() - Get host kernel RTO parameters
> > + * @c:		Execution context
> > + */
> > +void tcp_get_rto_params(struct ctx *c)
> > +{
> > +	intmax_t v;
> > +
> > +	v = read_file_integer(SYN_RETRIES, SYN_RETRIES_DEFAULT);
> > +	c->tcp.syn_retries = MIN(v, MAX_SYNCNT);
> > +
> > +	v = read_file_integer(SYN_LINEAR_TIMEOUTS, SYN_LINEAR_TIMEOUTS_DEFAULT);
> > +	c->tcp.syn_linear_timeouts = MIN(v, MAX_SYNCNT);
> > +
> > +	debug("Read sysctl values syn_retries: %"PRIu8
> > +	      ", syn_linear_timeouts: %"PRIu8,
> > +	      c->tcp.syn_retries,
> > +	      c->tcp.syn_linear_timeouts);
> > +}
> > +
> >  /**
> >   * tcp_init() - Get initial sequence, hash secret, initialise per-socket data
> >   * @c:		Execution context
> > @@ -2792,6 +2834,8 @@ int tcp_init(struct ctx *c)
> >  {
> >  	ASSERT(!c->no_tcp);
> >  
> > +	tcp_get_rto_params(c);
> > +
> >  	tcp_sock_iov_init(c);
> >  
> >  	memset(init_sock_pool4,		0xff,	sizeof(init_sock_pool4));
> > diff --git a/tcp.h b/tcp.h
> > index 0082386..37d7758 100644
> > --- a/tcp.h
> > +++ b/tcp.h
> > @@ -60,12 +60,16 @@ union tcp_listen_epoll_ref {
> >   * @fwd_out:		Port forwarding configuration for outbound packets
> >   * @timer_run:		Timestamp of most recent timer run
> >   * @pipe_size:		Size of pipes for spliced connections
> > + * @syn_retries:	SYN retries using exponential backoff timeout
> > + * @syn_linear_timeouts: SYN retries before using exponential backoff timeout
> >   */
> >  struct tcp_ctx {
> >  	struct fwd_ports fwd_in;
> >  	struct fwd_ports fwd_out;
> >  	struct timespec timer_run;
> >  	size_t pipe_size;
> > +	uint8_t syn_retries;
> > +	uint8_t syn_linear_timeouts;
> >  };
> >  
> >  #endif /* TCP_H */
> > -- 
> > 2.51.0

-- 
Stefano


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v8 3/6] tcp: Add parameter struct ctx *c to tcp_timer_ctl()
  2025-11-10 10:35   ` David Gibson
@ 2025-11-14  0:01     ` Stefano Brivio
  2025-11-14  0:36       ` David Gibson
  0 siblings, 1 reply; 32+ messages in thread
From: Stefano Brivio @ 2025-11-14  0:01 UTC (permalink / raw)
  To: David Gibson; +Cc: Yumei Huang, passt-dev

On Mon, 10 Nov 2025 21:35:24 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> On Mon, Nov 10, 2025 at 05:31:34PM +0800, Yumei Huang wrote:
> 
> Should have a commit message explaining that it was recently removed,
> and why you need it back.  Stefano might be able to add that on merge, though.

Even better: this should be part of the patch where tcp_timer_ctl()
needs 'c' again, so that if we are bisecting stuff and end up exactly
after this patch/commit, we don't get spurious static checkers warnings.

But we don't typically check those while bisecting (and I don't see any
good reason to do so), so I don't really care.

Still, yes, mentioning that, say:

---
Commit x ("y") dropped the 'c' parameter to tcp_timer_ctl() but we'll
need it again in the next commit, so add it back.
---

would be nice.

And yes, I can add it myself (even though it doesn't scale much, if I
need to edit a few commit messages and add references for every
series...), but looking at comments to 6/6 I think a respin might be
convenient anyway.

If you merge this change with the next patch you don't really need to
explain it, by the way, as it's obvious from the rest of the commit.

> > Signed-off-by: Yumei Huang <yuhuang@redhat.com>  
> 
> For the code itself,
> 
> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> 
> > ---
> >  tcp.c | 15 ++++++++-------
> >  1 file changed, 8 insertions(+), 7 deletions(-)
> > 
> > diff --git a/tcp.c b/tcp.c
> > index ca3d742..2f49327 100644
> > --- a/tcp.c
> > +++ b/tcp.c
> > @@ -543,11 +543,12 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
> >  
> >  /**
> >   * tcp_timer_ctl() - Set timerfd based on flags/events, create timerfd if needed
> > + * @c:		Execution context
> >   * @conn:	Connection pointer
> >   *
> >   * #syscalls timerfd_create timerfd_settime
> >   */
> > -static void tcp_timer_ctl(struct tcp_tap_conn *conn)
> > +static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
> >  {
> >  	struct itimerspec it = { { 0 }, { 0 } };
> >  
> > @@ -631,7 +632,7 @@ void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,
> >  			 * flags and factor this into the logic below.
> >  			 */
> >  			if (flag == ACK_FROM_TAP_DUE)
> > -				tcp_timer_ctl(conn);
> > +				tcp_timer_ctl(c, conn);
> >  
> >  			return;
> >  		}
> > @@ -647,7 +648,7 @@ void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,
> >  	if (flag == ACK_FROM_TAP_DUE || flag == ACK_TO_TAP_DUE		  ||
> >  	    (flag == ~ACK_FROM_TAP_DUE && (conn->flags & ACK_TO_TAP_DUE)) ||
> >  	    (flag == ~ACK_TO_TAP_DUE   && (conn->flags & ACK_FROM_TAP_DUE)))
> > -		tcp_timer_ctl(conn);
> > +		tcp_timer_ctl(c, conn);
> >  }
> >  
> >  /**
> > @@ -702,7 +703,7 @@ void conn_event_do(const struct ctx *c, struct tcp_tap_conn *conn,
> >  		tcp_epoll_ctl(c, conn);
> >  
> >  	if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED))
> > -		tcp_timer_ctl(conn);
> > +		tcp_timer_ctl(c, conn);
> >  }
> >  
> >  /**
> > @@ -1770,7 +1771,7 @@ static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn,
> >  				   seq, conn->seq_from_tap);
> >  
> >  			tcp_send_flag(c, conn, ACK);
> > -			tcp_timer_ctl(conn);
> > +			tcp_timer_ctl(c, conn);
> >  
> >  			if (p->count == 1) {
> >  				tcp_tap_window_update(c, conn,
> > @@ -2421,7 +2422,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
> >  
> >  	if (conn->flags & ACK_TO_TAP_DUE) {
> >  		tcp_send_flag(c, conn, ACK_IF_NEEDED);
> > -		tcp_timer_ctl(conn);
> > +		tcp_timer_ctl(c, conn);
> >  	} else if (conn->flags & ACK_FROM_TAP_DUE) {
> >  		if (!(conn->events & ESTABLISHED)) {
> >  			flow_dbg(conn, "handshake timeout");
> > @@ -2443,7 +2444,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
> >  				return;
> >  
> >  			tcp_data_from_sock(c, conn);
> > -			tcp_timer_ctl(conn);
> > +			tcp_timer_ctl(c, conn);
> >  		}
> >  	} else {
> >  		struct itimerspec new = { { 0 }, { ACT_TIMEOUT, 0 } };
> > -- 
> > 2.51.0

-- 
Stefano


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v8 6/6] tcp: Clamp the retry timeout
  2025-11-10 10:56   ` David Gibson
@ 2025-11-14  0:01     ` Stefano Brivio
  2025-11-14  0:35       ` David Gibson
  0 siblings, 1 reply; 32+ messages in thread
From: Stefano Brivio @ 2025-11-14  0:01 UTC (permalink / raw)
  To: David Gibson; +Cc: Yumei Huang, passt-dev

On Mon, 10 Nov 2025 21:56:39 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> On Mon, Nov 10, 2025 at 05:31:37PM +0800, Yumei Huang wrote:
> > Clamp the TCP retry timeout as Linux kernel does. If a retry occurs
> > during the handshake and the RTO is below 3 seconds, re-initialise
> > it to 3 seconds for data retransmissions according to RFC 6298.
> > 
> > Suggested-by: Stefano Brivio <sbrivio@redhat.com>
> > Signed-off-by: Yumei Huang <yuhuang@redhat.com>  
> 
> Looks correct, so
> 
> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> 
> A few possible improvements (mostly comments/names) below.
> 
> > ---
> >  tcp.c      | 25 ++++++++++++++++++++-----
> >  tcp.h      |  2 ++
> >  tcp_conn.h |  1 +
> >  3 files changed, 23 insertions(+), 5 deletions(-)
> > 
> > diff --git a/tcp.c b/tcp.c
> > index ee111e0..b015658 100644
> > --- a/tcp.c
> > +++ b/tcp.c
> > @@ -187,6 +187,9 @@
> >   *   for established connections, or (syn_retries + syn_linear_timeouts) times
> >   *   during the handshake, reset the connection
> >   *
> > + * - RTO_INIT_ACK: if the RTO is less than this, re-initialise RTO to this for
> > + *   data retransmissions
> > + *  
> 
> Now that this is conditional on SYN being retried, the description
> doesn't look entirely accurate any more.

If I read the whole thing again I would actually say it becomes
misleading and worth fixing before applying this. Also because the
relationship to the text in the RFC is not obvious anymore.

This should be "if SYN retries happened during handshake, ...".

> >   * - FIN_TIMEOUT: if a FIN segment was sent to tap/guest (flag ACK_FROM_TAP_DUE
> >   *   with TAP_FIN_SENT event), and no ACK is received within this time, reset
> >   *   the connection
> > @@ -340,6 +343,7 @@ enum {
> >  
> >  #define ACK_INTERVAL			10		/* ms */
> >  #define RTO_INIT			1		/* s, RFC 6298 */
> > +#define RTO_INIT_ACK			3		/* s, RFC 6298 */  
> 
> The relevance of "_ACK" in the name is not obvious to me.

What about RTO_INIT_AFTER_SYN_RETRIES? We use it only once, and it
looks fine there.

> 
> >  #define FIN_TIMEOUT			60
> >  #define ACT_TIMEOUT			7200
> >  
> -> @@ -365,9 +369,11 @@ uint8_t tcp_migrate_rcv_queue		[TCP_MIGRATE_RCV_QUEUE_MAX];
> >  
> >  #define SYN_RETRIES		"/proc/sys/net/ipv4/tcp_syn_retries"
> >  #define SYN_LINEAR_TIMEOUTS	"/proc/sys/net/ipv4/tcp_syn_linear_timeouts"
> > +#define RTO_MAX_MS		"/proc/sys/net/ipv4/tcp_rto_max_ms"
> >  
> >  #define SYN_RETRIES_DEFAULT		6
> >  #define SYN_LINEAR_TIMEOUTS_DEFAULT	4
> > +#define RTO_MAX_MS_DEFAULT		120000
> >  #define MAX_SYNCNT			127 /* derived from kernel's limit */
> >  
> >  /* "Extended" data (not stored in the flow table) for TCP flow migration */
> > @@ -392,7 +398,7 @@ static const char *tcp_state_str[] __attribute((__unused__)) = {
> >  
> >  static const char *tcp_flag_str[] __attribute((__unused__)) = {
> >  	"STALLED", "LOCAL", "ACTIVE_CLOSE", "ACK_TO_TAP_DUE",
> > -	"ACK_FROM_TAP_DUE", "ACK_FROM_TAP_BLOCKS",
> > +	"ACK_FROM_TAP_DUE", "ACK_FROM_TAP_BLOCKS", "SYN_RETRIED",
> >  };
> >  
> >  /* Listening sockets, used for automatic port forwarding in pasta mode only */
> > @@ -590,10 +596,13 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
> >  	if (conn->flags & ACK_TO_TAP_DUE) {
> >  		it.it_value.tv_nsec = (long)ACK_INTERVAL * 1000 * 1000;
> >  	} else if (conn->flags & ACK_FROM_TAP_DUE) {
> > -		int exp = conn->retries;
> > +		int exp = conn->retries, timeout = RTO_INIT;
> >  		if (!(conn->events & ESTABLISHED))
> >  			exp -= c->tcp.syn_linear_timeouts;
> > -		it.it_value.tv_sec = RTO_INIT << MAX(exp, 0);
> > +		else if (conn->flags & SYN_RETRIED)
> > +			timeout = MAX(timeout, RTO_INIT_ACK);
> > +		timeout <<= MAX(exp, 0);
> > +		it.it_value.tv_sec = MIN(timeout, c->tcp.rto_max);
> >  	} else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
> >  		it.it_value.tv_sec = FIN_TIMEOUT;
> >  	} else {
> > @@ -2440,6 +2449,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
> >  				flow_trace(conn, "SYN timeout, retry");
> >  				tcp_send_flag(c, conn, SYN);
> >  				conn->retries++;
> > +				conn_flag(c, conn, SYN_RETRIED);
> >  				tcp_timer_ctl(c, conn);
> >  			}
> >  		} else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
> > @@ -2811,10 +2821,15 @@ void tcp_get_rto_params(struct ctx *c)
> >  	v = read_file_integer(SYN_LINEAR_TIMEOUTS, SYN_LINEAR_TIMEOUTS_DEFAULT);
> >  	c->tcp.syn_linear_timeouts = MIN(v, MAX_SYNCNT);
> >  
> > +	v = read_file_integer(RTO_MAX_MS, RTO_MAX_MS_DEFAULT);
> > +	c->tcp.rto_max = MIN(DIV_ROUND_CLOSEST(v, 1000), INT_MAX);  
> 
> Possibly we should verify this is => RTO_INIT.

As a sanity check, maybe, but I don't see any harmful effect if it's
< RTO_INIT, right? So I'm not sure if we should. No preference from my
side really.

> > +
> >  	debug("Read sysctl values syn_retries: %"PRIu8
> > -	      ", syn_linear_timeouts: %"PRIu8,
> > +	      ", syn_linear_timeouts: %"PRIu8
> > +	      ", rto_max: %d",
> >  	      c->tcp.syn_retries,
> > -	      c->tcp.syn_linear_timeouts);
> > +	      c->tcp.syn_linear_timeouts,
> > +	      c->tcp.rto_max);
> >  }
> >  
> >  /**
> > diff --git a/tcp.h b/tcp.h
> > index 37d7758..c4945c3 100644
> > --- a/tcp.h
> > +++ b/tcp.h
> > @@ -60,6 +60,7 @@ union tcp_listen_epoll_ref {
> >   * @fwd_out:		Port forwarding configuration for outbound packets
> >   * @timer_run:		Timestamp of most recent timer run
> >   * @pipe_size:		Size of pipes for spliced connections
> > + * @rto_max:		Maximal retry timeout (in s)

As pointed out earlier, I think "maximum" is slightly more appropriate
here.

> >   * @syn_retries:	SYN retries using exponential backoff timeout
> >   * @syn_linear_timeouts: SYN retries before using exponential backoff timeout
> >   */
> > @@ -68,6 +69,7 @@ struct tcp_ctx {
> >  	struct fwd_ports fwd_out;
> >  	struct timespec timer_run;
> >  	size_t pipe_size;
> > +	int rto_max;
> >  	uint8_t syn_retries;
> >  	uint8_t syn_linear_timeouts;
> >  };
> > diff --git a/tcp_conn.h b/tcp_conn.h
> > index 923af36..e36910c 100644
> > --- a/tcp_conn.h
> > +++ b/tcp_conn.h
> > @@ -77,6 +77,7 @@ struct tcp_tap_conn {
> >  #define ACK_TO_TAP_DUE		BIT(3)
> >  #define ACK_FROM_TAP_DUE	BIT(4)
> >  #define ACK_FROM_TAP_BLOCKS	BIT(5)
> > +#define SYN_RETRIED		BIT(6)
> >  
> >  #define SNDBUF_BITS		24
> >  	unsigned int	sndbuf		:SNDBUF_BITS;
> > -- 
> > 2.51.0

-- 
Stefano


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v8 2/6] util: Introduce read_file() and read_file_integer() function
  2025-11-10  9:31 ` [PATCH v8 2/6] util: Introduce read_file() and read_file_integer() function Yumei Huang
@ 2025-11-14  0:01   ` Stefano Brivio
  2025-11-14  1:58     ` Yumei Huang
  2025-11-18  0:19   ` Stefano Brivio
  1 sibling, 1 reply; 32+ messages in thread
From: Stefano Brivio @ 2025-11-14  0:01 UTC (permalink / raw)
  To: Yumei Huang; +Cc: passt-dev, david

On Mon, 10 Nov 2025 17:31:33 +0800
Yumei Huang <yuhuang@redhat.com> wrote:

> Signed-off-by: Yumei Huang <yuhuang@redhat.com>
> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  util.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  util.h |  2 ++
>  2 files changed, 88 insertions(+)
> 
> diff --git a/util.c b/util.c
> index 44c21a3..c4c849c 100644
> --- a/util.c
> +++ b/util.c
> @@ -590,6 +590,92 @@ int write_file(const char *path, const char *buf)
>  	return len == 0 ? 0 : -1;
>  }
>  
> +/**
> + * read_file() - Read contents of file into a NULL-terminated buffer
> + * @path:	Path to file to read
> + * @buf:	Buffer to store file contents
> + * @buf_size:	Size of buffer
> + *
> + * Return: number of bytes read on success, -1 on error, -ENOBUFS on truncation
> + */
> +ssize_t read_file(const char *path, char *buf, size_t buf_size)
> +{
> +	int fd = open(path, O_RDONLY | O_CLOEXEC);
> +	size_t total_read = 0;
> +	ssize_t rc;
> +
> +	if (fd < 0) {
> +		warn_perror("Could not open %s", path);
> +		return -1;
> +	}
> +
> +	while (total_read < buf_size) {
> +		rc = read(fd, buf + total_read, buf_size - total_read);

cppcheck rightfully says that:

util.c:604:10: style: The scope of the variable 'rc' can be reduced. [variableScope]
 ssize_t rc;
         ^

> +
> +		if (rc < 0) {
> +			warn_perror("Couldn't read from %s", path);
> +			close(fd);
> +			return -1;
> +		}
> +
> +		if (rc == 0)
> +			break;
> +
> +		total_read += rc;
> +	}
> +
> +	close(fd);
> +
> +	if (total_read == buf_size) {
> +		warn("File %s contents exceed buffer size %zu", path,
> +		     buf_size);
> +		buf[buf_size - 1] = '\0';

I suggested we need this, but Coverity Scan points out that:

---
/home/sbrivio/passt/util.c:631:3:
  Type: Overflowed constant (INTEGER_OVERFLOW)

/home/sbrivio/passt/util.c:606:2:
  1. path: Condition "fd < 0", taking false branch.
/home/sbrivio/passt/util.c:611:2:
  2. path: Condition "total_read < buf_size", taking false branch.
/home/sbrivio/passt/util.c:628:2:
  3. path: Condition "total_read == buf_size", taking true branch.
/home/sbrivio/passt/util.c:631:3:
  4. overflow_const: Expression "buf_size - 1UL", where "buf_size" is known to be equal to 0, underflows the type of "buf_size - 1UL", which is type "unsigned long".
---

in the (faulty) case where somebody calls this with 0 as buf_size.

On the other hand, the passed value of buf_size might be a result of a
wrong calculation, and in that case we don't want to write some
unrelated value on the stack of the caller or smash the stack.

We could ASSERT(buf_size), but in the future we might abuse read_file()
to just check that a file is there and can be read, instead of actually
reading it.

So maybe we could just return (after closing fd) before read() on
!buf_size?

> +		return -ENOBUFS;
> +	}
> +
> +	buf[total_read] = '\0';
> +
> +	return total_read;
> +}
> +
> +/**
> + * read_file_integer() - Read an integer value from a file
> + * @path:	Path to file to read
> + * @fallback:	Default value if file can't be read
> + *
> + * Return: integer value, @fallback on failure
> + */
> +intmax_t read_file_integer(const char *path, intmax_t fallback)
> +{
> +	ssize_t bytes_read;
> +	char buf[BUFSIZ];
> +	intmax_t value;
> +	char *end;
> +
> +	bytes_read = read_file(path, buf, sizeof(buf));
> +
> +	if (bytes_read < 0)
> +		return fallback;
> +
> +	if (bytes_read == 0) {
> +		debug("Empty file %s", path);
> +		return fallback;
> +	}
> +
> +	errno = 0;
> +	value = strtoimax(buf, &end, 10);
> +	if (*end && *end != '\n') {
> +		debug("Non-numeric content in %s", path);
> +		return fallback;
> +	}
> +	if (errno) {
> +		debug("Out of range value in %s: %s", path, buf);
> +		return fallback;
> +	}
> +
> +	return value;
> +}
> +
>  #ifdef __ia64__
>  /* Needed by do_clone() below: glibc doesn't export the prototype of __clone2(),
>   * use the description from clone(2).
> diff --git a/util.h b/util.h
> index a0b2ada..c1502cc 100644
> --- a/util.h
> +++ b/util.h
> @@ -229,6 +229,8 @@ void pidfile_write(int fd, pid_t pid);
>  int __daemon(int pidfile_fd, int devnull_fd);
>  int fls(unsigned long x);
>  int write_file(const char *path, const char *buf);
> +ssize_t read_file(const char *path, char *buf, size_t buf_size);
> +intmax_t read_file_integer(const char *path, intmax_t fallback);
>  int write_all_buf(int fd, const void *buf, size_t len);
>  int write_remainder(int fd, const struct iovec *iov, size_t iovcnt, size_t skip);
>  int read_all_buf(int fd, void *buf, size_t len);

-- 
Stefano


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v8 4/6] tcp: Resend SYN for inbound connections
  2025-11-14  0:00     ` Stefano Brivio
@ 2025-11-14  0:31       ` David Gibson
  0 siblings, 0 replies; 32+ messages in thread
From: David Gibson @ 2025-11-14  0:31 UTC (permalink / raw)
  To: Stefano Brivio; +Cc: Yumei Huang, passt-dev

[-- Attachment #1: Type: text/plain, Size: 8887 bytes --]

On Fri, Nov 14, 2025 at 01:00:53AM +0100, Stefano Brivio wrote:
> On Mon, 10 Nov 2025 21:46:55 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > On Mon, Nov 10, 2025 at 05:31:35PM +0800, Yumei Huang wrote:
> > > If a client connects while guest is not connected or ready yet,
> > > resend SYN instead of just resetting connection after 10 seconds.
> > > 
> > > Use the same backoff calculation for the timeout as Linux kernel.
> > > 
> > > Link: https://bugs.passt.top/show_bug.cgi?id=153
> > > Signed-off-by: Yumei Huang <yuhuang@redhat.com>  
> > 
> > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > 
> > Though I have one small concern remaining
> > 
> > > ---
> > >  tcp.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++---------
> > >  tcp.h |  4 ++++
> > >  2 files changed, 57 insertions(+), 9 deletions(-)
> > > 
> > > diff --git a/tcp.c b/tcp.c
> > > index 2f49327..da40a99 100644
> > > --- a/tcp.c
> > > +++ b/tcp.c
> > > @@ -179,9 +179,11 @@
> > >   *
> > >   * Timeouts are implemented by means of timerfd timers, set based on flags:
> > >   *
> > > - * - SYN_TIMEOUT: if no ACK is received from tap/guest during handshake (flag
> > > - *   ACK_FROM_TAP_DUE without ESTABLISHED event) within this time, reset the
> > > - *   connection
> > > + * - SYN_TIMEOUT_INIT: if no SYN,ACK is received from tap/guest during
> > > + *   handshake (flag ACK_FROM_TAP_DUE without ESTABLISHED event) within
> > > + *   this time, resend SYN. It's the starting timeout for the first SYN
> > > + *   retry. Retry for TCP_MAX_RETRIES or (syn_retries + syn_linear_timeouts)
> > > + *   times, reset the connection
> > >   *
> > >   * - ACK_TIMEOUT: if no ACK segment was received from tap/guest, after sending
> > >   *   data (flag ACK_FROM_TAP_DUE with ESTABLISHED event), re-send data from the
> > > @@ -340,7 +342,7 @@ enum {
> > >  #define WINDOW_DEFAULT			14600		/* RFC 6928 */
> > >  
> > >  #define ACK_INTERVAL			10		/* ms */
> > > -#define SYN_TIMEOUT			10		/* s */
> > > +#define SYN_TIMEOUT_INIT		1		/* s, RFC 6928 */
> > >  #define ACK_TIMEOUT			2
> > >  #define FIN_TIMEOUT			60
> > >  #define ACT_TIMEOUT			7200
> > > @@ -365,6 +367,13 @@ uint8_t tcp_migrate_rcv_queue		[TCP_MIGRATE_RCV_QUEUE_MAX];
> > >  
> > >  #define TCP_MIGRATE_RESTORE_CHUNK_MIN	1024 /* Try smaller when above this */
> > >  
> > > +#define SYN_RETRIES		"/proc/sys/net/ipv4/tcp_syn_retries"
> > > +#define SYN_LINEAR_TIMEOUTS	"/proc/sys/net/ipv4/tcp_syn_linear_timeouts"
> > > +
> > > +#define SYN_RETRIES_DEFAULT		6
> > > +#define SYN_LINEAR_TIMEOUTS_DEFAULT	4
> > > +#define MAX_SYNCNT			127 /* derived from kernel's limit */
> > > +
> > >  /* "Extended" data (not stored in the flow table) for TCP flow migration */
> > >  static struct tcp_tap_transfer_ext migrate_ext[FLOW_MAX];
> > >  
> > > @@ -585,10 +594,13 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
> > >  	if (conn->flags & ACK_TO_TAP_DUE) {
> > >  		it.it_value.tv_nsec = (long)ACK_INTERVAL * 1000 * 1000;
> > >  	} else if (conn->flags & ACK_FROM_TAP_DUE) {
> > > -		if (!(conn->events & ESTABLISHED))
> > > -			it.it_value.tv_sec = SYN_TIMEOUT;
> > > -		else
> > > +		if (!(conn->events & ESTABLISHED)) {
> > > +			int exp = conn->retries - c->tcp.syn_linear_timeouts;  
> > 
> > As discussed in the thread on the last version, the subtraction will
> > be done unsigned, so the final result depends on behaviour of casting
> > a "negative" (that is, close to max value) unsigned value into a
> > signed variable.  I'm pretty sure that will do the right thing in
> > practice, but I don't believe that behaviour is guaranteed by the C
> > standard.
> 
> Surprisingly to me, C99 6.2.6.2 "Integer types" admits three
> interpretations of the sign bit for signed types, quoting:
> 
>   — the corresponding value with sign bit 0 is negated (sign and
>     magnitude);
>   — the sign bit has the value −(2N ) (two’s complement);
>   — the sign bit has the value −(2N − 1) (ones’ complement).
> 
> and this, together with the conversion rule from 6.3.1.3 "Signed
> and unsigned integers":
> 
>   Otherwise, the new type is signed and the value cannot be represented
>   in it; either the result is implementation-defined or an
>   implementation-defined signal is raised.
> 
> seems to confirm your interpretation, even though I'm really having
> a hard time convincing myself that we should actually do stuff like:
> 
> 			int exp;
> 
> 			exp = (int)conn->retries - c->tcp.syn_linear_timeouts;
> 
> and I still have the suspicion we're missing something. Is this syntax
> what you're suggesting, by the way?

Yes, that's what I'm suggesting.

> > This probably isn't worth a respin, though.
> 
> Maybe not but:
> 
> > > +			it.it_value.tv_sec = SYN_TIMEOUT_INIT << MAX(exp, 0);
> > > +		}
> > > +		else {
> 
> this is another bit I would need to change. The coding style dictates:
> 
> 		if (x) {
> 			...
> 		} else {
> 			...
> 		}
> 
> *not*:
> 
> 		if (x) {
> 			...
> 		}
> 		else {
> 			...
> 		}
> 
> ...on the other hand, it goes away in 5/6, so I don't care
> particularly.

Right, that was my reasoning too.  Mind you might as well fix it if
there's going to be a respin anyway.

> It still matters a very tiny bit because if we need to revert the
> commit resulting from 5/6 for any reason we'll end up with broken
> coding style. But that's not a realistic scenario and it's very minor
> anyway.
> 
> > >  			it.it_value.tv_sec = ACK_TIMEOUT;
> > > +		}
> > >  	} else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
> > >  		it.it_value.tv_sec = FIN_TIMEOUT;
> > >  	} else {
> > > @@ -2425,8 +2437,18 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
> > >  		tcp_timer_ctl(c, conn);
> > >  	} else if (conn->flags & ACK_FROM_TAP_DUE) {
> > >  		if (!(conn->events & ESTABLISHED)) {
> > > -			flow_dbg(conn, "handshake timeout");
> > > -			tcp_rst(c, conn);
> > > +			int max;
> > > +			max = c->tcp.syn_retries + c->tcp.syn_linear_timeouts;
> > > +			max = MIN(TCP_MAX_RETRIES, max);
> > > +			if (conn->retries >= max) {
> > > +				flow_dbg(conn, "handshake timeout");
> > > +				tcp_rst(c, conn);
> > > +			} else {
> > > +				flow_trace(conn, "SYN timeout, retry");
> > > +				tcp_send_flag(c, conn, SYN);
> > > +				conn->retries++;
> > > +				tcp_timer_ctl(c, conn);
> > > +			}
> > >  		} else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
> > >  			flow_dbg(conn, "FIN timeout");
> > >  			tcp_rst(c, conn);
> > > @@ -2782,6 +2804,26 @@ static socklen_t tcp_probe_tcp_info(void)
> > >  	return sl;
> > >  }
> > >  
> > > +/**
> > > + * tcp_get_rto_params() - Get host kernel RTO parameters
> > > + * @c:		Execution context
> > > + */
> > > +void tcp_get_rto_params(struct ctx *c)
> > > +{
> > > +	intmax_t v;
> > > +
> > > +	v = read_file_integer(SYN_RETRIES, SYN_RETRIES_DEFAULT);
> > > +	c->tcp.syn_retries = MIN(v, MAX_SYNCNT);
> > > +
> > > +	v = read_file_integer(SYN_LINEAR_TIMEOUTS, SYN_LINEAR_TIMEOUTS_DEFAULT);
> > > +	c->tcp.syn_linear_timeouts = MIN(v, MAX_SYNCNT);
> > > +
> > > +	debug("Read sysctl values syn_retries: %"PRIu8
> > > +	      ", syn_linear_timeouts: %"PRIu8,
> > > +	      c->tcp.syn_retries,
> > > +	      c->tcp.syn_linear_timeouts);
> > > +}
> > > +
> > >  /**
> > >   * tcp_init() - Get initial sequence, hash secret, initialise per-socket data
> > >   * @c:		Execution context
> > > @@ -2792,6 +2834,8 @@ int tcp_init(struct ctx *c)
> > >  {
> > >  	ASSERT(!c->no_tcp);
> > >  
> > > +	tcp_get_rto_params(c);
> > > +
> > >  	tcp_sock_iov_init(c);
> > >  
> > >  	memset(init_sock_pool4,		0xff,	sizeof(init_sock_pool4));
> > > diff --git a/tcp.h b/tcp.h
> > > index 0082386..37d7758 100644
> > > --- a/tcp.h
> > > +++ b/tcp.h
> > > @@ -60,12 +60,16 @@ union tcp_listen_epoll_ref {
> > >   * @fwd_out:		Port forwarding configuration for outbound packets
> > >   * @timer_run:		Timestamp of most recent timer run
> > >   * @pipe_size:		Size of pipes for spliced connections
> > > + * @syn_retries:	SYN retries using exponential backoff timeout
> > > + * @syn_linear_timeouts: SYN retries before using exponential backoff timeout
> > >   */
> > >  struct tcp_ctx {
> > >  	struct fwd_ports fwd_in;
> > >  	struct fwd_ports fwd_out;
> > >  	struct timespec timer_run;
> > >  	size_t pipe_size;
> > > +	uint8_t syn_retries;
> > > +	uint8_t syn_linear_timeouts;
> > >  };
> > >  
> > >  #endif /* TCP_H */
> > > -- 
> > > 2.51.0
> 
> -- 
> Stefano
> 

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v8 6/6] tcp: Clamp the retry timeout
  2025-11-14  0:01     ` Stefano Brivio
@ 2025-11-14  0:35       ` David Gibson
  2025-11-14  3:05         ` Yumei Huang
  0 siblings, 1 reply; 32+ messages in thread
From: David Gibson @ 2025-11-14  0:35 UTC (permalink / raw)
  To: Stefano Brivio; +Cc: Yumei Huang, passt-dev

[-- Attachment #1: Type: text/plain, Size: 7475 bytes --]

On Fri, Nov 14, 2025 at 01:01:21AM +0100, Stefano Brivio wrote:
> On Mon, 10 Nov 2025 21:56:39 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > On Mon, Nov 10, 2025 at 05:31:37PM +0800, Yumei Huang wrote:
> > > Clamp the TCP retry timeout as Linux kernel does. If a retry occurs
> > > during the handshake and the RTO is below 3 seconds, re-initialise
> > > it to 3 seconds for data retransmissions according to RFC 6298.
> > > 
> > > Suggested-by: Stefano Brivio <sbrivio@redhat.com>
> > > Signed-off-by: Yumei Huang <yuhuang@redhat.com>  
> > 
> > Looks correct, so
> > 
> > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > 
> > A few possible improvements (mostly comments/names) below.
> > 
> > > ---
> > >  tcp.c      | 25 ++++++++++++++++++++-----
> > >  tcp.h      |  2 ++
> > >  tcp_conn.h |  1 +
> > >  3 files changed, 23 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/tcp.c b/tcp.c
> > > index ee111e0..b015658 100644
> > > --- a/tcp.c
> > > +++ b/tcp.c
> > > @@ -187,6 +187,9 @@
> > >   *   for established connections, or (syn_retries + syn_linear_timeouts) times
> > >   *   during the handshake, reset the connection
> > >   *
> > > + * - RTO_INIT_ACK: if the RTO is less than this, re-initialise RTO to this for
> > > + *   data retransmissions
> > > + *  
> > 
> > Now that this is conditional on SYN being retried, the description
> > doesn't look entirely accurate any more.
> 
> If I read the whole thing again I would actually say it becomes
> misleading and worth fixing before applying this. Also because the
> relationship to the text in the RFC is not obvious anymore.
> 
> This should be "if SYN retries happened during handshake, ...".
> 
> > >   * - FIN_TIMEOUT: if a FIN segment was sent to tap/guest (flag ACK_FROM_TAP_DUE
> > >   *   with TAP_FIN_SENT event), and no ACK is received within this time, reset
> > >   *   the connection
> > > @@ -340,6 +343,7 @@ enum {
> > >  
> > >  #define ACK_INTERVAL			10		/* ms */
> > >  #define RTO_INIT			1		/* s, RFC 6298 */
> > > +#define RTO_INIT_ACK			3		/* s, RFC 6298 */  
> > 
> > The relevance of "_ACK" in the name is not obvious to me.
> 
> What about RTO_INIT_AFTER_SYN_RETRIES? We use it only once, and it
> looks fine there.

Works for me.

> > >  #define FIN_TIMEOUT			60
> > >  #define ACT_TIMEOUT			7200
> > >  
> > -> @@ -365,9 +369,11 @@ uint8_t tcp_migrate_rcv_queue		[TCP_MIGRATE_RCV_QUEUE_MAX];
> > >  
> > >  #define SYN_RETRIES		"/proc/sys/net/ipv4/tcp_syn_retries"
> > >  #define SYN_LINEAR_TIMEOUTS	"/proc/sys/net/ipv4/tcp_syn_linear_timeouts"
> > > +#define RTO_MAX_MS		"/proc/sys/net/ipv4/tcp_rto_max_ms"
> > >  
> > >  #define SYN_RETRIES_DEFAULT		6
> > >  #define SYN_LINEAR_TIMEOUTS_DEFAULT	4
> > > +#define RTO_MAX_MS_DEFAULT		120000
> > >  #define MAX_SYNCNT			127 /* derived from kernel's limit */
> > >  
> > >  /* "Extended" data (not stored in the flow table) for TCP flow migration */
> > > @@ -392,7 +398,7 @@ static const char *tcp_state_str[] __attribute((__unused__)) = {
> > >  
> > >  static const char *tcp_flag_str[] __attribute((__unused__)) = {
> > >  	"STALLED", "LOCAL", "ACTIVE_CLOSE", "ACK_TO_TAP_DUE",
> > > -	"ACK_FROM_TAP_DUE", "ACK_FROM_TAP_BLOCKS",
> > > +	"ACK_FROM_TAP_DUE", "ACK_FROM_TAP_BLOCKS", "SYN_RETRIED",
> > >  };
> > >  
> > >  /* Listening sockets, used for automatic port forwarding in pasta mode only */
> > > @@ -590,10 +596,13 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
> > >  	if (conn->flags & ACK_TO_TAP_DUE) {
> > >  		it.it_value.tv_nsec = (long)ACK_INTERVAL * 1000 * 1000;
> > >  	} else if (conn->flags & ACK_FROM_TAP_DUE) {
> > > -		int exp = conn->retries;
> > > +		int exp = conn->retries, timeout = RTO_INIT;
> > >  		if (!(conn->events & ESTABLISHED))
> > >  			exp -= c->tcp.syn_linear_timeouts;
> > > -		it.it_value.tv_sec = RTO_INIT << MAX(exp, 0);
> > > +		else if (conn->flags & SYN_RETRIED)
> > > +			timeout = MAX(timeout, RTO_INIT_ACK);
> > > +		timeout <<= MAX(exp, 0);
> > > +		it.it_value.tv_sec = MIN(timeout, c->tcp.rto_max);
> > >  	} else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
> > >  		it.it_value.tv_sec = FIN_TIMEOUT;
> > >  	} else {
> > > @@ -2440,6 +2449,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
> > >  				flow_trace(conn, "SYN timeout, retry");
> > >  				tcp_send_flag(c, conn, SYN);
> > >  				conn->retries++;
> > > +				conn_flag(c, conn, SYN_RETRIED);
> > >  				tcp_timer_ctl(c, conn);
> > >  			}
> > >  		} else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
> > > @@ -2811,10 +2821,15 @@ void tcp_get_rto_params(struct ctx *c)
> > >  	v = read_file_integer(SYN_LINEAR_TIMEOUTS, SYN_LINEAR_TIMEOUTS_DEFAULT);
> > >  	c->tcp.syn_linear_timeouts = MIN(v, MAX_SYNCNT);
> > >  
> > > +	v = read_file_integer(RTO_MAX_MS, RTO_MAX_MS_DEFAULT);
> > > +	c->tcp.rto_max = MIN(DIV_ROUND_CLOSEST(v, 1000), INT_MAX);  
> > 
> > Possibly we should verify this is => RTO_INIT.
> 
> As a sanity check, maybe, but I don't see any harmful effect if it's
> < RTO_INIT, right? So I'm not sure if we should. No preference from my
> side really.

Sorry, describing this as >= RTO_INIT was misleading.  What I'm
concerned about here is if the kernel value is set to 400ms, we'll
round it to... 0s.

So, really what I'm concerned about is that we ensure this is > 0.

> > > +
> > >  	debug("Read sysctl values syn_retries: %"PRIu8
> > > -	      ", syn_linear_timeouts: %"PRIu8,
> > > +	      ", syn_linear_timeouts: %"PRIu8
> > > +	      ", rto_max: %d",
> > >  	      c->tcp.syn_retries,
> > > -	      c->tcp.syn_linear_timeouts);
> > > +	      c->tcp.syn_linear_timeouts,
> > > +	      c->tcp.rto_max);
> > >  }
> > >  
> > >  /**
> > > diff --git a/tcp.h b/tcp.h
> > > index 37d7758..c4945c3 100644
> > > --- a/tcp.h
> > > +++ b/tcp.h
> > > @@ -60,6 +60,7 @@ union tcp_listen_epoll_ref {
> > >   * @fwd_out:		Port forwarding configuration for outbound packets
> > >   * @timer_run:		Timestamp of most recent timer run
> > >   * @pipe_size:		Size of pipes for spliced connections
> > > + * @rto_max:		Maximal retry timeout (in s)
> 
> As pointed out earlier, I think "maximum" is slightly more appropriate
> here.

Agreed, with nuance discussed earlier.

> 
> > >   * @syn_retries:	SYN retries using exponential backoff timeout
> > >   * @syn_linear_timeouts: SYN retries before using exponential backoff timeout
> > >   */
> > > @@ -68,6 +69,7 @@ struct tcp_ctx {
> > >  	struct fwd_ports fwd_out;
> > >  	struct timespec timer_run;
> > >  	size_t pipe_size;
> > > +	int rto_max;
> > >  	uint8_t syn_retries;
> > >  	uint8_t syn_linear_timeouts;
> > >  };
> > > diff --git a/tcp_conn.h b/tcp_conn.h
> > > index 923af36..e36910c 100644
> > > --- a/tcp_conn.h
> > > +++ b/tcp_conn.h
> > > @@ -77,6 +77,7 @@ struct tcp_tap_conn {
> > >  #define ACK_TO_TAP_DUE		BIT(3)
> > >  #define ACK_FROM_TAP_DUE	BIT(4)
> > >  #define ACK_FROM_TAP_BLOCKS	BIT(5)
> > > +#define SYN_RETRIED		BIT(6)
> > >  
> > >  #define SNDBUF_BITS		24
> > >  	unsigned int	sndbuf		:SNDBUF_BITS;
> > > -- 
> > > 2.51.0
> 
> -- 
> Stefano
> 

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v8 3/6] tcp: Add parameter struct ctx *c to tcp_timer_ctl()
  2025-11-14  0:01     ` Stefano Brivio
@ 2025-11-14  0:36       ` David Gibson
  0 siblings, 0 replies; 32+ messages in thread
From: David Gibson @ 2025-11-14  0:36 UTC (permalink / raw)
  To: Stefano Brivio; +Cc: Yumei Huang, passt-dev

[-- Attachment #1: Type: text/plain, Size: 4749 bytes --]

On Fri, Nov 14, 2025 at 01:01:12AM +0100, Stefano Brivio wrote:
> On Mon, 10 Nov 2025 21:35:24 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > On Mon, Nov 10, 2025 at 05:31:34PM +0800, Yumei Huang wrote:
> > 
> > Should have a commit message explaining that it was recently removed,
> > and why you need it back.  Stefano might be able to add that on merge, though.
> 
> Even better: this should be part of the patch where tcp_timer_ctl()
> needs 'c' again, so that if we are bisecting stuff and end up exactly
> after this patch/commit, we don't get spurious static checkers warnings.
> 
> But we don't typically check those while bisecting (and I don't see any
> good reason to do so), so I don't really care.
> 
> Still, yes, mentioning that, say:
> 
> ---
> Commit x ("y") dropped the 'c' parameter to tcp_timer_ctl() but we'll
> need it again in the next commit, so add it back.
> ---
> 
> would be nice.
> 
> And yes, I can add it myself (even though it doesn't scale much, if I
> need to edit a few commit messages and add references for every
> series...), but looking at comments to 6/6 I think a respin might be
> convenient anyway.

Right.  Adjusting on merge would only make sense if there was nothing
else to polish in a respin.

> If you merge this change with the next patch you don't really need to
> explain it, by the way, as it's obvious from the rest of the commit.
> 
> > > Signed-off-by: Yumei Huang <yuhuang@redhat.com>  
> > 
> > For the code itself,
> > 
> > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > 
> > > ---
> > >  tcp.c | 15 ++++++++-------
> > >  1 file changed, 8 insertions(+), 7 deletions(-)
> > > 
> > > diff --git a/tcp.c b/tcp.c
> > > index ca3d742..2f49327 100644
> > > --- a/tcp.c
> > > +++ b/tcp.c
> > > @@ -543,11 +543,12 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
> > >  
> > >  /**
> > >   * tcp_timer_ctl() - Set timerfd based on flags/events, create timerfd if needed
> > > + * @c:		Execution context
> > >   * @conn:	Connection pointer
> > >   *
> > >   * #syscalls timerfd_create timerfd_settime
> > >   */
> > > -static void tcp_timer_ctl(struct tcp_tap_conn *conn)
> > > +static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
> > >  {
> > >  	struct itimerspec it = { { 0 }, { 0 } };
> > >  
> > > @@ -631,7 +632,7 @@ void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,
> > >  			 * flags and factor this into the logic below.
> > >  			 */
> > >  			if (flag == ACK_FROM_TAP_DUE)
> > > -				tcp_timer_ctl(conn);
> > > +				tcp_timer_ctl(c, conn);
> > >  
> > >  			return;
> > >  		}
> > > @@ -647,7 +648,7 @@ void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,
> > >  	if (flag == ACK_FROM_TAP_DUE || flag == ACK_TO_TAP_DUE		  ||
> > >  	    (flag == ~ACK_FROM_TAP_DUE && (conn->flags & ACK_TO_TAP_DUE)) ||
> > >  	    (flag == ~ACK_TO_TAP_DUE   && (conn->flags & ACK_FROM_TAP_DUE)))
> > > -		tcp_timer_ctl(conn);
> > > +		tcp_timer_ctl(c, conn);
> > >  }
> > >  
> > >  /**
> > > @@ -702,7 +703,7 @@ void conn_event_do(const struct ctx *c, struct tcp_tap_conn *conn,
> > >  		tcp_epoll_ctl(c, conn);
> > >  
> > >  	if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED))
> > > -		tcp_timer_ctl(conn);
> > > +		tcp_timer_ctl(c, conn);
> > >  }
> > >  
> > >  /**
> > > @@ -1770,7 +1771,7 @@ static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn,
> > >  				   seq, conn->seq_from_tap);
> > >  
> > >  			tcp_send_flag(c, conn, ACK);
> > > -			tcp_timer_ctl(conn);
> > > +			tcp_timer_ctl(c, conn);
> > >  
> > >  			if (p->count == 1) {
> > >  				tcp_tap_window_update(c, conn,
> > > @@ -2421,7 +2422,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
> > >  
> > >  	if (conn->flags & ACK_TO_TAP_DUE) {
> > >  		tcp_send_flag(c, conn, ACK_IF_NEEDED);
> > > -		tcp_timer_ctl(conn);
> > > +		tcp_timer_ctl(c, conn);
> > >  	} else if (conn->flags & ACK_FROM_TAP_DUE) {
> > >  		if (!(conn->events & ESTABLISHED)) {
> > >  			flow_dbg(conn, "handshake timeout");
> > > @@ -2443,7 +2444,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
> > >  				return;
> > >  
> > >  			tcp_data_from_sock(c, conn);
> > > -			tcp_timer_ctl(conn);
> > > +			tcp_timer_ctl(c, conn);
> > >  		}
> > >  	} else {
> > >  		struct itimerspec new = { { 0 }, { ACT_TIMEOUT, 0 } };
> > > -- 
> > > 2.51.0
> 
> -- 
> Stefano
> 

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v8 2/6] util: Introduce read_file() and read_file_integer() function
  2025-11-14  0:01   ` Stefano Brivio
@ 2025-11-14  1:58     ` Yumei Huang
  2025-11-14  4:32       ` David Gibson
  2025-11-14 10:12       ` Stefano Brivio
  0 siblings, 2 replies; 32+ messages in thread
From: Yumei Huang @ 2025-11-14  1:58 UTC (permalink / raw)
  To: Stefano Brivio; +Cc: passt-dev, david

On Fri, Nov 14, 2025 at 8:01 AM Stefano Brivio <sbrivio@redhat.com> wrote:
>
> On Mon, 10 Nov 2025 17:31:33 +0800
> Yumei Huang <yuhuang@redhat.com> wrote:
>
> > Signed-off-by: Yumei Huang <yuhuang@redhat.com>
> > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> >  util.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  util.h |  2 ++
> >  2 files changed, 88 insertions(+)
> >
> > diff --git a/util.c b/util.c
> > index 44c21a3..c4c849c 100644
> > --- a/util.c
> > +++ b/util.c
> > @@ -590,6 +590,92 @@ int write_file(const char *path, const char *buf)
> >       return len == 0 ? 0 : -1;
> >  }
> >
> > +/**
> > + * read_file() - Read contents of file into a NULL-terminated buffer
> > + * @path:    Path to file to read
> > + * @buf:     Buffer to store file contents
> > + * @buf_size:        Size of buffer
> > + *
> > + * Return: number of bytes read on success, -1 on error, -ENOBUFS on truncation
> > + */
> > +ssize_t read_file(const char *path, char *buf, size_t buf_size)
> > +{
> > +     int fd = open(path, O_RDONLY | O_CLOEXEC);
> > +     size_t total_read = 0;
> > +     ssize_t rc;
> > +
> > +     if (fd < 0) {
> > +             warn_perror("Could not open %s", path);
> > +             return -1;
> > +     }
> > +
> > +     while (total_read < buf_size) {
> > +             rc = read(fd, buf + total_read, buf_size - total_read);
>
> cppcheck rightfully says that:
>
> util.c:604:10: style: The scope of the variable 'rc' can be reduced. [variableScope]
>  ssize_t rc;
>          ^

Right.
Seems it also says:

tcp.c:2814:0: style: The function 'tcp_get_rto_params' should have
static linkage since it is not used outside of its translation unit.
[staticFunction]
void tcp_get_rto_params(struct ctx *c)
^
util.c:601:0: style: The function 'read_file' should have static
linkage since it is not used outside of its translation unit.
[staticFunction]
ssize_t read_file(const char *path, char *buf, size_t buf_size)

I understand read_file() may be called from other places in the
future. But do we need to add static now? I guess we need it for
tcp_get_rto_params().
>
> > +
> > +             if (rc < 0) {
> > +                     warn_perror("Couldn't read from %s", path);
> > +                     close(fd);
> > +                     return -1;
> > +             }
> > +
> > +             if (rc == 0)
> > +                     break;
> > +
> > +             total_read += rc;
> > +     }
> > +
> > +     close(fd);
> > +
> > +     if (total_read == buf_size) {
> > +             warn("File %s contents exceed buffer size %zu", path,
> > +                  buf_size);
> > +             buf[buf_size - 1] = '\0';
>
> I suggested we need this, but Coverity Scan points out that:
>
> ---
> /home/sbrivio/passt/util.c:631:3:
>   Type: Overflowed constant (INTEGER_OVERFLOW)
>
> /home/sbrivio/passt/util.c:606:2:
>   1. path: Condition "fd < 0", taking false branch.
> /home/sbrivio/passt/util.c:611:2:
>   2. path: Condition "total_read < buf_size", taking false branch.
> /home/sbrivio/passt/util.c:628:2:
>   3. path: Condition "total_read == buf_size", taking true branch.
> /home/sbrivio/passt/util.c:631:3:
>   4. overflow_const: Expression "buf_size - 1UL", where "buf_size" is known to be equal to 0, underflows the type of "buf_size - 1UL", which is type "unsigned long".
> ---

Somehow my Coverity Scan didn't complain about that.
>
> in the (faulty) case where somebody calls this with 0 as buf_size.
>
> On the other hand, the passed value of buf_size might be a result of a
> wrong calculation, and in that case we don't want to write some
> unrelated value on the stack of the caller or smash the stack.
>
> We could ASSERT(buf_size), but in the future we might abuse read_file()
> to just check that a file is there and can be read, instead of actually
> reading it.
>
> So maybe we could just return (after closing fd) before read() on
> !buf_size?

Sure. I can add that.
>
> > +             return -ENOBUFS;
> > +     }
> > +
> > +     buf[total_read] = '\0';
> > +
> > +     return total_read;
> > +}
> > +
> > +/**
> > + * read_file_integer() - Read an integer value from a file
> > + * @path:    Path to file to read
> > + * @fallback:        Default value if file can't be read
> > + *
> > + * Return: integer value, @fallback on failure
> > + */
> > +intmax_t read_file_integer(const char *path, intmax_t fallback)
> > +{
> > +     ssize_t bytes_read;
> > +     char buf[BUFSIZ];
> > +     intmax_t value;
> > +     char *end;
> > +
> > +     bytes_read = read_file(path, buf, sizeof(buf));
> > +
> > +     if (bytes_read < 0)
> > +             return fallback;
> > +
> > +     if (bytes_read == 0) {
> > +             debug("Empty file %s", path);
> > +             return fallback;
> > +     }
> > +
> > +     errno = 0;
> > +     value = strtoimax(buf, &end, 10);
> > +     if (*end && *end != '\n') {
> > +             debug("Non-numeric content in %s", path);
> > +             return fallback;
> > +     }
> > +     if (errno) {
> > +             debug("Out of range value in %s: %s", path, buf);
> > +             return fallback;
> > +     }
> > +
> > +     return value;
> > +}
> > +
> >  #ifdef __ia64__
> >  /* Needed by do_clone() below: glibc doesn't export the prototype of __clone2(),
> >   * use the description from clone(2).
> > diff --git a/util.h b/util.h
> > index a0b2ada..c1502cc 100644
> > --- a/util.h
> > +++ b/util.h
> > @@ -229,6 +229,8 @@ void pidfile_write(int fd, pid_t pid);
> >  int __daemon(int pidfile_fd, int devnull_fd);
> >  int fls(unsigned long x);
> >  int write_file(const char *path, const char *buf);
> > +ssize_t read_file(const char *path, char *buf, size_t buf_size);
> > +intmax_t read_file_integer(const char *path, intmax_t fallback);
> >  int write_all_buf(int fd, const void *buf, size_t len);
> >  int write_remainder(int fd, const struct iovec *iov, size_t iovcnt, size_t skip);
> >  int read_all_buf(int fd, void *buf, size_t len);
>
> --
> Stefano
>


-- 
Thanks,

Yumei Huang


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v8 6/6] tcp: Clamp the retry timeout
  2025-11-14  0:35       ` David Gibson
@ 2025-11-14  3:05         ` Yumei Huang
  2025-11-14  3:35           ` David Gibson
  0 siblings, 1 reply; 32+ messages in thread
From: Yumei Huang @ 2025-11-14  3:05 UTC (permalink / raw)
  To: David Gibson; +Cc: Stefano Brivio, passt-dev

On Fri, Nov 14, 2025 at 8:47 AM David Gibson
<david@gibson.dropbear.id.au> wrote:
>
> On Fri, Nov 14, 2025 at 01:01:21AM +0100, Stefano Brivio wrote:
> > On Mon, 10 Nov 2025 21:56:39 +1100
> > David Gibson <david@gibson.dropbear.id.au> wrote:
> >
> > > On Mon, Nov 10, 2025 at 05:31:37PM +0800, Yumei Huang wrote:
> > > > Clamp the TCP retry timeout as Linux kernel does. If a retry occurs
> > > > during the handshake and the RTO is below 3 seconds, re-initialise
> > > > it to 3 seconds for data retransmissions according to RFC 6298.
> > > >
> > > > Suggested-by: Stefano Brivio <sbrivio@redhat.com>
> > > > Signed-off-by: Yumei Huang <yuhuang@redhat.com>
> > >
> > > Looks correct, so
> > >
> > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > >
> > > A few possible improvements (mostly comments/names) below.
> > >
> > > > ---
> > > >  tcp.c      | 25 ++++++++++++++++++++-----
> > > >  tcp.h      |  2 ++
> > > >  tcp_conn.h |  1 +
> > > >  3 files changed, 23 insertions(+), 5 deletions(-)
> > > >
> > > > diff --git a/tcp.c b/tcp.c
> > > > index ee111e0..b015658 100644
> > > > --- a/tcp.c
> > > > +++ b/tcp.c
> > > > @@ -187,6 +187,9 @@
> > > >   *   for established connections, or (syn_retries + syn_linear_timeouts) times
> > > >   *   during the handshake, reset the connection
> > > >   *
> > > > + * - RTO_INIT_ACK: if the RTO is less than this, re-initialise RTO to this for
> > > > + *   data retransmissions
> > > > + *
> > >
> > > Now that this is conditional on SYN being retried, the description
> > > doesn't look entirely accurate any more.
> >
> > If I read the whole thing again I would actually say it becomes
> > misleading and worth fixing before applying this. Also because the
> > relationship to the text in the RFC is not obvious anymore.
> >
> > This should be "if SYN retries happened during handshake, ...".
> >
> > > >   * - FIN_TIMEOUT: if a FIN segment was sent to tap/guest (flag ACK_FROM_TAP_DUE
> > > >   *   with TAP_FIN_SENT event), and no ACK is received within this time, reset
> > > >   *   the connection
> > > > @@ -340,6 +343,7 @@ enum {
> > > >
> > > >  #define ACK_INTERVAL                     10              /* ms */
> > > >  #define RTO_INIT                 1               /* s, RFC 6298 */
> > > > +#define RTO_INIT_ACK                     3               /* s, RFC 6298 */
> > >
> > > The relevance of "_ACK" in the name is not obvious to me.
> >
> > What about RTO_INIT_AFTER_SYN_RETRIES? We use it only once, and it
> > looks fine there.
>
> Works for me.
>
> > > >  #define FIN_TIMEOUT                      60
> > > >  #define ACT_TIMEOUT                      7200
> > > >
> > > -> @@ -365,9 +369,11 @@ uint8_t tcp_migrate_rcv_queue               [TCP_MIGRATE_RCV_QUEUE_MAX];
> > > >
> > > >  #define SYN_RETRIES              "/proc/sys/net/ipv4/tcp_syn_retries"
> > > >  #define SYN_LINEAR_TIMEOUTS      "/proc/sys/net/ipv4/tcp_syn_linear_timeouts"
> > > > +#define RTO_MAX_MS               "/proc/sys/net/ipv4/tcp_rto_max_ms"
> > > >
> > > >  #define SYN_RETRIES_DEFAULT              6
> > > >  #define SYN_LINEAR_TIMEOUTS_DEFAULT      4
> > > > +#define RTO_MAX_MS_DEFAULT               120000
> > > >  #define MAX_SYNCNT                       127 /* derived from kernel's limit */
> > > >
> > > >  /* "Extended" data (not stored in the flow table) for TCP flow migration */
> > > > @@ -392,7 +398,7 @@ static const char *tcp_state_str[] __attribute((__unused__)) = {
> > > >
> > > >  static const char *tcp_flag_str[] __attribute((__unused__)) = {
> > > >   "STALLED", "LOCAL", "ACTIVE_CLOSE", "ACK_TO_TAP_DUE",
> > > > - "ACK_FROM_TAP_DUE", "ACK_FROM_TAP_BLOCKS",
> > > > + "ACK_FROM_TAP_DUE", "ACK_FROM_TAP_BLOCKS", "SYN_RETRIED",
> > > >  };
> > > >
> > > >  /* Listening sockets, used for automatic port forwarding in pasta mode only */
> > > > @@ -590,10 +596,13 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
> > > >   if (conn->flags & ACK_TO_TAP_DUE) {
> > > >           it.it_value.tv_nsec = (long)ACK_INTERVAL * 1000 * 1000;
> > > >   } else if (conn->flags & ACK_FROM_TAP_DUE) {
> > > > -         int exp = conn->retries;
> > > > +         int exp = conn->retries, timeout = RTO_INIT;
> > > >           if (!(conn->events & ESTABLISHED))
> > > >                   exp -= c->tcp.syn_linear_timeouts;
> > > > -         it.it_value.tv_sec = RTO_INIT << MAX(exp, 0);
> > > > +         else if (conn->flags & SYN_RETRIED)
> > > > +                 timeout = MAX(timeout, RTO_INIT_ACK);
> > > > +         timeout <<= MAX(exp, 0);
> > > > +         it.it_value.tv_sec = MIN(timeout, c->tcp.rto_max);
> > > >   } else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
> > > >           it.it_value.tv_sec = FIN_TIMEOUT;
> > > >   } else {
> > > > @@ -2440,6 +2449,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
> > > >                           flow_trace(conn, "SYN timeout, retry");
> > > >                           tcp_send_flag(c, conn, SYN);
> > > >                           conn->retries++;
> > > > +                         conn_flag(c, conn, SYN_RETRIED);
> > > >                           tcp_timer_ctl(c, conn);
> > > >                   }
> > > >           } else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
> > > > @@ -2811,10 +2821,15 @@ void tcp_get_rto_params(struct ctx *c)
> > > >   v = read_file_integer(SYN_LINEAR_TIMEOUTS, SYN_LINEAR_TIMEOUTS_DEFAULT);
> > > >   c->tcp.syn_linear_timeouts = MIN(v, MAX_SYNCNT);
> > > >
> > > > + v = read_file_integer(RTO_MAX_MS, RTO_MAX_MS_DEFAULT);
> > > > + c->tcp.rto_max = MIN(DIV_ROUND_CLOSEST(v, 1000), INT_MAX);
> > >
> > > Possibly we should verify this is => RTO_INIT.
> >
> > As a sanity check, maybe, but I don't see any harmful effect if it's
> > < RTO_INIT, right? So I'm not sure if we should. No preference from my
> > side really.
>
> Sorry, describing this as >= RTO_INIT was misleading.  What I'm
> concerned about here is if the kernel value is set to 400ms, we'll
> round it to... 0s.
>
> So, really what I'm concerned about is that we ensure this is > 0.

That's a good point.
>
> > > > +
> > > >   debug("Read sysctl values syn_retries: %"PRIu8
> > > > -       ", syn_linear_timeouts: %"PRIu8,
> > > > +       ", syn_linear_timeouts: %"PRIu8
> > > > +       ", rto_max: %d",
> > > >         c->tcp.syn_retries,
> > > > -       c->tcp.syn_linear_timeouts);
> > > > +       c->tcp.syn_linear_timeouts,
> > > > +       c->tcp.rto_max);
> > > >  }
> > > >
> > > >  /**
> > > > diff --git a/tcp.h b/tcp.h
> > > > index 37d7758..c4945c3 100644
> > > > --- a/tcp.h
> > > > +++ b/tcp.h
> > > > @@ -60,6 +60,7 @@ union tcp_listen_epoll_ref {
> > > >   * @fwd_out:             Port forwarding configuration for outbound packets
> > > >   * @timer_run:           Timestamp of most recent timer run
> > > >   * @pipe_size:           Size of pipes for spliced connections
> > > > + * @rto_max:             Maximal retry timeout (in s)
> >
> > As pointed out earlier, I think "maximum" is slightly more appropriate
> > here.
>
> Agreed, with nuance discussed earlier.

Well, I must have misunderstood something in last week's meeting. I
joined the second half meeting a few mins later and I caught Stefano
saying he was wrong about maximal. Probably he was talking about the
French meaning. Anyway, I will update.
>
> >
> > > >   * @syn_retries: SYN retries using exponential backoff timeout
> > > >   * @syn_linear_timeouts: SYN retries before using exponential backoff timeout
> > > >   */
> > > > @@ -68,6 +69,7 @@ struct tcp_ctx {
> > > >   struct fwd_ports fwd_out;
> > > >   struct timespec timer_run;
> > > >   size_t pipe_size;
> > > > + int rto_max;
> > > >   uint8_t syn_retries;
> > > >   uint8_t syn_linear_timeouts;
> > > >  };
> > > > diff --git a/tcp_conn.h b/tcp_conn.h
> > > > index 923af36..e36910c 100644
> > > > --- a/tcp_conn.h
> > > > +++ b/tcp_conn.h
> > > > @@ -77,6 +77,7 @@ struct tcp_tap_conn {
> > > >  #define ACK_TO_TAP_DUE           BIT(3)
> > > >  #define ACK_FROM_TAP_DUE BIT(4)
> > > >  #define ACK_FROM_TAP_BLOCKS      BIT(5)
> > > > +#define SYN_RETRIED              BIT(6)
> > > >
> > > >  #define SNDBUF_BITS              24
> > > >   unsigned int    sndbuf          :SNDBUF_BITS;
> > > > --
> > > > 2.51.0
> >
> > --
> > Stefano
> >
>
> --
> David Gibson (he or they)       | I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au  | minimalist, thank you, not the other way
>                                 | around.
> http://www.ozlabs.org/~dgibson



-- 
Thanks,

Yumei Huang


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v8 6/6] tcp: Clamp the retry timeout
  2025-11-14  3:05         ` Yumei Huang
@ 2025-11-14  3:35           ` David Gibson
  2025-11-17  2:38             ` Yumei Huang
  2025-11-18  0:19             ` Stefano Brivio
  0 siblings, 2 replies; 32+ messages in thread
From: David Gibson @ 2025-11-14  3:35 UTC (permalink / raw)
  To: Yumei Huang; +Cc: Stefano Brivio, passt-dev

[-- Attachment #1: Type: text/plain, Size: 9847 bytes --]

On Fri, Nov 14, 2025 at 11:05:51AM +0800, Yumei Huang wrote:
> On Fri, Nov 14, 2025 at 8:47 AM David Gibson
> <david@gibson.dropbear.id.au> wrote:
> >
> > On Fri, Nov 14, 2025 at 01:01:21AM +0100, Stefano Brivio wrote:
> > > On Mon, 10 Nov 2025 21:56:39 +1100
> > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > >
> > > > On Mon, Nov 10, 2025 at 05:31:37PM +0800, Yumei Huang wrote:
> > > > > Clamp the TCP retry timeout as Linux kernel does. If a retry occurs
> > > > > during the handshake and the RTO is below 3 seconds, re-initialise
> > > > > it to 3 seconds for data retransmissions according to RFC 6298.
> > > > >
> > > > > Suggested-by: Stefano Brivio <sbrivio@redhat.com>
> > > > > Signed-off-by: Yumei Huang <yuhuang@redhat.com>
> > > >
> > > > Looks correct, so
> > > >
> > > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > > >
> > > > A few possible improvements (mostly comments/names) below.
> > > >
> > > > > ---
> > > > >  tcp.c      | 25 ++++++++++++++++++++-----
> > > > >  tcp.h      |  2 ++
> > > > >  tcp_conn.h |  1 +
> > > > >  3 files changed, 23 insertions(+), 5 deletions(-)
> > > > >
> > > > > diff --git a/tcp.c b/tcp.c
> > > > > index ee111e0..b015658 100644
> > > > > --- a/tcp.c
> > > > > +++ b/tcp.c
> > > > > @@ -187,6 +187,9 @@
> > > > >   *   for established connections, or (syn_retries + syn_linear_timeouts) times
> > > > >   *   during the handshake, reset the connection
> > > > >   *
> > > > > + * - RTO_INIT_ACK: if the RTO is less than this, re-initialise RTO to this for
> > > > > + *   data retransmissions
> > > > > + *
> > > >
> > > > Now that this is conditional on SYN being retried, the description
> > > > doesn't look entirely accurate any more.
> > >
> > > If I read the whole thing again I would actually say it becomes
> > > misleading and worth fixing before applying this. Also because the
> > > relationship to the text in the RFC is not obvious anymore.
> > >
> > > This should be "if SYN retries happened during handshake, ...".
> > >
> > > > >   * - FIN_TIMEOUT: if a FIN segment was sent to tap/guest (flag ACK_FROM_TAP_DUE
> > > > >   *   with TAP_FIN_SENT event), and no ACK is received within this time, reset
> > > > >   *   the connection
> > > > > @@ -340,6 +343,7 @@ enum {
> > > > >
> > > > >  #define ACK_INTERVAL                     10              /* ms */
> > > > >  #define RTO_INIT                 1               /* s, RFC 6298 */
> > > > > +#define RTO_INIT_ACK                     3               /* s, RFC 6298 */
> > > >
> > > > The relevance of "_ACK" in the name is not obvious to me.
> > >
> > > What about RTO_INIT_AFTER_SYN_RETRIES? We use it only once, and it
> > > looks fine there.
> >
> > Works for me.
> >
> > > > >  #define FIN_TIMEOUT                      60
> > > > >  #define ACT_TIMEOUT                      7200
> > > > >
> > > > -> @@ -365,9 +369,11 @@ uint8_t tcp_migrate_rcv_queue               [TCP_MIGRATE_RCV_QUEUE_MAX];
> > > > >
> > > > >  #define SYN_RETRIES              "/proc/sys/net/ipv4/tcp_syn_retries"
> > > > >  #define SYN_LINEAR_TIMEOUTS      "/proc/sys/net/ipv4/tcp_syn_linear_timeouts"
> > > > > +#define RTO_MAX_MS               "/proc/sys/net/ipv4/tcp_rto_max_ms"
> > > > >
> > > > >  #define SYN_RETRIES_DEFAULT              6
> > > > >  #define SYN_LINEAR_TIMEOUTS_DEFAULT      4
> > > > > +#define RTO_MAX_MS_DEFAULT               120000
> > > > >  #define MAX_SYNCNT                       127 /* derived from kernel's limit */
> > > > >
> > > > >  /* "Extended" data (not stored in the flow table) for TCP flow migration */
> > > > > @@ -392,7 +398,7 @@ static const char *tcp_state_str[] __attribute((__unused__)) = {
> > > > >
> > > > >  static const char *tcp_flag_str[] __attribute((__unused__)) = {
> > > > >   "STALLED", "LOCAL", "ACTIVE_CLOSE", "ACK_TO_TAP_DUE",
> > > > > - "ACK_FROM_TAP_DUE", "ACK_FROM_TAP_BLOCKS",
> > > > > + "ACK_FROM_TAP_DUE", "ACK_FROM_TAP_BLOCKS", "SYN_RETRIED",
> > > > >  };
> > > > >
> > > > >  /* Listening sockets, used for automatic port forwarding in pasta mode only */
> > > > > @@ -590,10 +596,13 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
> > > > >   if (conn->flags & ACK_TO_TAP_DUE) {
> > > > >           it.it_value.tv_nsec = (long)ACK_INTERVAL * 1000 * 1000;
> > > > >   } else if (conn->flags & ACK_FROM_TAP_DUE) {
> > > > > -         int exp = conn->retries;
> > > > > +         int exp = conn->retries, timeout = RTO_INIT;
> > > > >           if (!(conn->events & ESTABLISHED))
> > > > >                   exp -= c->tcp.syn_linear_timeouts;
> > > > > -         it.it_value.tv_sec = RTO_INIT << MAX(exp, 0);
> > > > > +         else if (conn->flags & SYN_RETRIED)
> > > > > +                 timeout = MAX(timeout, RTO_INIT_ACK);
> > > > > +         timeout <<= MAX(exp, 0);
> > > > > +         it.it_value.tv_sec = MIN(timeout, c->tcp.rto_max);
> > > > >   } else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
> > > > >           it.it_value.tv_sec = FIN_TIMEOUT;
> > > > >   } else {
> > > > > @@ -2440,6 +2449,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
> > > > >                           flow_trace(conn, "SYN timeout, retry");
> > > > >                           tcp_send_flag(c, conn, SYN);
> > > > >                           conn->retries++;
> > > > > +                         conn_flag(c, conn, SYN_RETRIED);
> > > > >                           tcp_timer_ctl(c, conn);
> > > > >                   }
> > > > >           } else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
> > > > > @@ -2811,10 +2821,15 @@ void tcp_get_rto_params(struct ctx *c)
> > > > >   v = read_file_integer(SYN_LINEAR_TIMEOUTS, SYN_LINEAR_TIMEOUTS_DEFAULT);
> > > > >   c->tcp.syn_linear_timeouts = MIN(v, MAX_SYNCNT);
> > > > >
> > > > > + v = read_file_integer(RTO_MAX_MS, RTO_MAX_MS_DEFAULT);
> > > > > + c->tcp.rto_max = MIN(DIV_ROUND_CLOSEST(v, 1000), INT_MAX);
> > > >
> > > > Possibly we should verify this is => RTO_INIT.
> > >
> > > As a sanity check, maybe, but I don't see any harmful effect if it's
> > > < RTO_INIT, right? So I'm not sure if we should. No preference from my
> > > side really.
> >
> > Sorry, describing this as >= RTO_INIT was misleading.  What I'm
> > concerned about here is if the kernel value is set to 400ms, we'll
> > round it to... 0s.
> >
> > So, really what I'm concerned about is that we ensure this is > 0.
> 
> That's a good point.

Actually, thinking about it, I wonder if makes more sense to always
round up to 1s, rather than to the nearest 1s.


> > > > > +
> > > > >   debug("Read sysctl values syn_retries: %"PRIu8
> > > > > -       ", syn_linear_timeouts: %"PRIu8,
> > > > > +       ", syn_linear_timeouts: %"PRIu8
> > > > > +       ", rto_max: %d",
> > > > >         c->tcp.syn_retries,
> > > > > -       c->tcp.syn_linear_timeouts);
> > > > > +       c->tcp.syn_linear_timeouts,
> > > > > +       c->tcp.rto_max);
> > > > >  }
> > > > >
> > > > >  /**
> > > > > diff --git a/tcp.h b/tcp.h
> > > > > index 37d7758..c4945c3 100644
> > > > > --- a/tcp.h
> > > > > +++ b/tcp.h
> > > > > @@ -60,6 +60,7 @@ union tcp_listen_epoll_ref {
> > > > >   * @fwd_out:             Port forwarding configuration for outbound packets
> > > > >   * @timer_run:           Timestamp of most recent timer run
> > > > >   * @pipe_size:           Size of pipes for spliced connections
> > > > > + * @rto_max:             Maximal retry timeout (in s)
> > >
> > > As pointed out earlier, I think "maximum" is slightly more appropriate
> > > here.
> >
> > Agreed, with nuance discussed earlier.
> 
> Well, I must have misunderstood something in last week's meeting. I
> joined the second half meeting a few mins later and I caught Stefano
> saying he was wrong about maximal. Probably he was talking about the
> French meaning. Anyway, I will update.

Sorry, yeah, the discussion there was probably fairly confusing - I
think both Stefano and I got a bit distracted from the question at
hand by language-nerding.

The upshot is that while I think both "maximal" and "maximum" would be
correct, I think "maximum" is slightly clearer.

> >
> > >
> > > > >   * @syn_retries: SYN retries using exponential backoff timeout
> > > > >   * @syn_linear_timeouts: SYN retries before using exponential backoff timeout
> > > > >   */
> > > > > @@ -68,6 +69,7 @@ struct tcp_ctx {
> > > > >   struct fwd_ports fwd_out;
> > > > >   struct timespec timer_run;
> > > > >   size_t pipe_size;
> > > > > + int rto_max;
> > > > >   uint8_t syn_retries;
> > > > >   uint8_t syn_linear_timeouts;
> > > > >  };
> > > > > diff --git a/tcp_conn.h b/tcp_conn.h
> > > > > index 923af36..e36910c 100644
> > > > > --- a/tcp_conn.h
> > > > > +++ b/tcp_conn.h
> > > > > @@ -77,6 +77,7 @@ struct tcp_tap_conn {
> > > > >  #define ACK_TO_TAP_DUE           BIT(3)
> > > > >  #define ACK_FROM_TAP_DUE BIT(4)
> > > > >  #define ACK_FROM_TAP_BLOCKS      BIT(5)
> > > > > +#define SYN_RETRIED              BIT(6)
> > > > >
> > > > >  #define SNDBUF_BITS              24
> > > > >   unsigned int    sndbuf          :SNDBUF_BITS;
> > > > > --
> > > > > 2.51.0
> > >
> > > --
> > > Stefano
> > >
> >
> > --
> > David Gibson (he or they)       | I'll have my music baroque, and my code
> > david AT gibson.dropbear.id.au  | minimalist, thank you, not the other way
> >                                 | around.
> > http://www.ozlabs.org/~dgibson
> 
> 
> 
> -- 
> Thanks,
> 
> Yumei Huang
> 

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v8 2/6] util: Introduce read_file() and read_file_integer() function
  2025-11-14  1:58     ` Yumei Huang
@ 2025-11-14  4:32       ` David Gibson
  2025-11-14 10:12       ` Stefano Brivio
  1 sibling, 0 replies; 32+ messages in thread
From: David Gibson @ 2025-11-14  4:32 UTC (permalink / raw)
  To: Yumei Huang; +Cc: Stefano Brivio, passt-dev

[-- Attachment #1: Type: text/plain, Size: 2817 bytes --]

On Fri, Nov 14, 2025 at 09:58:57AM +0800, Yumei Huang wrote:
> On Fri, Nov 14, 2025 at 8:01 AM Stefano Brivio <sbrivio@redhat.com> wrote:
> >
> > On Mon, 10 Nov 2025 17:31:33 +0800
> > Yumei Huang <yuhuang@redhat.com> wrote:
> >
> > > Signed-off-by: Yumei Huang <yuhuang@redhat.com>
> > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > > ---
> > >  util.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  util.h |  2 ++
> > >  2 files changed, 88 insertions(+)
> > >
> > > diff --git a/util.c b/util.c
> > > index 44c21a3..c4c849c 100644
> > > --- a/util.c
> > > +++ b/util.c
> > > @@ -590,6 +590,92 @@ int write_file(const char *path, const char *buf)
> > >       return len == 0 ? 0 : -1;
> > >  }
> > >
> > > +/**
> > > + * read_file() - Read contents of file into a NULL-terminated buffer
> > > + * @path:    Path to file to read
> > > + * @buf:     Buffer to store file contents
> > > + * @buf_size:        Size of buffer
> > > + *
> > > + * Return: number of bytes read on success, -1 on error, -ENOBUFS on truncation
> > > + */
> > > +ssize_t read_file(const char *path, char *buf, size_t buf_size)
> > > +{
> > > +     int fd = open(path, O_RDONLY | O_CLOEXEC);
> > > +     size_t total_read = 0;
> > > +     ssize_t rc;
> > > +
> > > +     if (fd < 0) {
> > > +             warn_perror("Could not open %s", path);
> > > +             return -1;
> > > +     }
> > > +
> > > +     while (total_read < buf_size) {
> > > +             rc = read(fd, buf + total_read, buf_size - total_read);
> >
> > cppcheck rightfully says that:
> >
> > util.c:604:10: style: The scope of the variable 'rc' can be reduced. [variableScope]
> >  ssize_t rc;
> >          ^
> 
> Right.
> Seems it also says:
> 
> tcp.c:2814:0: style: The function 'tcp_get_rto_params' should have
> static linkage since it is not used outside of its translation unit.
> [staticFunction]
> void tcp_get_rto_params(struct ctx *c)
> ^

Ah, yes, that should be 'static'.  Sorry, missed that in earlier reviews.

> util.c:601:0: style: The function 'read_file' should have static
> linkage since it is not used outside of its translation unit.
> [staticFunction]
> ssize_t read_file(const char *path, char *buf, size_t buf_size)
> 
> I understand read_file() may be called from other places in the
> future. But do we need to add static now? I guess we need it for
> tcp_get_rto_params().

It's true that read_file() might be used elsewhere in future.  I'd
still make it 'static' for now, and we can change that if/when we use
it somewhere else.

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v8 2/6] util: Introduce read_file() and read_file_integer() function
  2025-11-14  1:58     ` Yumei Huang
  2025-11-14  4:32       ` David Gibson
@ 2025-11-14 10:12       ` Stefano Brivio
  2025-11-17  1:10         ` Yumei Huang
  1 sibling, 1 reply; 32+ messages in thread
From: Stefano Brivio @ 2025-11-14 10:12 UTC (permalink / raw)
  To: Yumei Huang; +Cc: passt-dev, david

On Fri, 14 Nov 2025 09:58:57 +0800
Yumei Huang <yuhuang@redhat.com> wrote:

> On Fri, Nov 14, 2025 at 8:01 AM Stefano Brivio <sbrivio@redhat.com> wrote:
> >
> > On Mon, 10 Nov 2025 17:31:33 +0800
> > Yumei Huang <yuhuang@redhat.com> wrote:
> >  
> > > Signed-off-by: Yumei Huang <yuhuang@redhat.com>
> > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > > ---
> > >  util.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  util.h |  2 ++
> > >  2 files changed, 88 insertions(+)
> > >
> > > diff --git a/util.c b/util.c
> > > index 44c21a3..c4c849c 100644
> > > --- a/util.c
> > > +++ b/util.c
> > > @@ -590,6 +590,92 @@ int write_file(const char *path, const char *buf)
> > >       return len == 0 ? 0 : -1;
> > >  }
> > >
> > > +/**
> > > + * read_file() - Read contents of file into a NULL-terminated buffer
> > > + * @path:    Path to file to read
> > > + * @buf:     Buffer to store file contents
> > > + * @buf_size:        Size of buffer
> > > + *
> > > + * Return: number of bytes read on success, -1 on error, -ENOBUFS on truncation
> > > + */
> > > +ssize_t read_file(const char *path, char *buf, size_t buf_size)
> > > +{
> > > +     int fd = open(path, O_RDONLY | O_CLOEXEC);
> > > +     size_t total_read = 0;
> > > +     ssize_t rc;
> > > +
> > > +     if (fd < 0) {
> > > +             warn_perror("Could not open %s", path);
> > > +             return -1;
> > > +     }
> > > +
> > > +     while (total_read < buf_size) {
> > > +             rc = read(fd, buf + total_read, buf_size - total_read);  
> >
> > cppcheck rightfully says that:
> >
> > util.c:604:10: style: The scope of the variable 'rc' can be reduced. [variableScope]
> >  ssize_t rc;
> >          ^  
> 
> Right.
> Seems it also says:
> 
> tcp.c:2814:0: style: The function 'tcp_get_rto_params' should have
> static linkage since it is not used outside of its translation unit.
> [staticFunction]
> void tcp_get_rto_params(struct ctx *c)
> ^
> util.c:601:0: style: The function 'read_file' should have static
> linkage since it is not used outside of its translation unit.
> [staticFunction]
> ssize_t read_file(const char *path, char *buf, size_t buf_size)

Oops, my current version (2.16.0) doesn't say that, I should upgrade it
(but I typically try to remain a few versions behind as David usually
upgrades right away, so that we catch also differences).

> I understand read_file() may be called from other places in the
> future. But do we need to add static now? I guess we need it for
> tcp_get_rto_params().

On top of what David said, I'm not sure if we'll ever need it in
tcp_get_rto_params(): I guess we'll always want to read integers there.

By the way, don't forget to drop the prototype from util.h as it's not
needed if you make it static.

> > > +
> > > +             if (rc < 0) {
> > > +                     warn_perror("Couldn't read from %s", path);
> > > +                     close(fd);
> > > +                     return -1;
> > > +             }
> > > +
> > > +             if (rc == 0)
> > > +                     break;
> > > +
> > > +             total_read += rc;
> > > +     }
> > > +
> > > +     close(fd);
> > > +
> > > +     if (total_read == buf_size) {
> > > +             warn("File %s contents exceed buffer size %zu", path,
> > > +                  buf_size);
> > > +             buf[buf_size - 1] = '\0';  
> >
> > I suggested we need this, but Coverity Scan points out that:
> >
> > ---
> > /home/sbrivio/passt/util.c:631:3:
> >   Type: Overflowed constant (INTEGER_OVERFLOW)
> >
> > /home/sbrivio/passt/util.c:606:2:
> >   1. path: Condition "fd < 0", taking false branch.
> > /home/sbrivio/passt/util.c:611:2:
> >   2. path: Condition "total_read < buf_size", taking false branch.
> > /home/sbrivio/passt/util.c:628:2:
> >   3. path: Condition "total_read == buf_size", taking true branch.
> > /home/sbrivio/passt/util.c:631:3:
> >   4. overflow_const: Expression "buf_size - 1UL", where "buf_size" is known to be equal to 0, underflows the type of "buf_size - 1UL", which is type "unsigned long".
> > ---  
> 
> Somehow my Coverity Scan didn't complain about that.

Did you forget to pass '--security' to cov-analyze perhaps?

> > in the (faulty) case where somebody calls this with 0 as buf_size.
> >
> > On the other hand, the passed value of buf_size might be a result of a
> > wrong calculation, and in that case we don't want to write some
> > unrelated value on the stack of the caller or smash the stack.
> >
> > We could ASSERT(buf_size), but in the future we might abuse read_file()
> > to just check that a file is there and can be read, instead of actually
> > reading it.
> >
> > So maybe we could just return (after closing fd) before read() on
> > !buf_size?  
> 
> Sure. I can add that.
> >  
> > > +             return -ENOBUFS;
> > > +     }
> > > +
> > > +     buf[total_read] = '\0';
> > > +
> > > +     return total_read;
> > > +}
> > > +
> > > +/**
> > > + * read_file_integer() - Read an integer value from a file
> > > + * @path:    Path to file to read
> > > + * @fallback:        Default value if file can't be read
> > > + *
> > > + * Return: integer value, @fallback on failure
> > > + */
> > > +intmax_t read_file_integer(const char *path, intmax_t fallback)
> > > +{
> > > +     ssize_t bytes_read;
> > > +     char buf[BUFSIZ];
> > > +     intmax_t value;
> > > +     char *end;
> > > +
> > > +     bytes_read = read_file(path, buf, sizeof(buf));
> > > +
> > > +     if (bytes_read < 0)
> > > +             return fallback;
> > > +
> > > +     if (bytes_read == 0) {
> > > +             debug("Empty file %s", path);
> > > +             return fallback;
> > > +     }
> > > +
> > > +     errno = 0;
> > > +     value = strtoimax(buf, &end, 10);
> > > +     if (*end && *end != '\n') {
> > > +             debug("Non-numeric content in %s", path);
> > > +             return fallback;
> > > +     }
> > > +     if (errno) {
> > > +             debug("Out of range value in %s: %s", path, buf);
> > > +             return fallback;
> > > +     }
> > > +
> > > +     return value;
> > > +}
> > > +
> > >  #ifdef __ia64__
> > >  /* Needed by do_clone() below: glibc doesn't export the prototype of __clone2(),
> > >   * use the description from clone(2).
> > > diff --git a/util.h b/util.h
> > > index a0b2ada..c1502cc 100644
> > > --- a/util.h
> > > +++ b/util.h
> > > @@ -229,6 +229,8 @@ void pidfile_write(int fd, pid_t pid);
> > >  int __daemon(int pidfile_fd, int devnull_fd);
> > >  int fls(unsigned long x);
> > >  int write_file(const char *path, const char *buf);
> > > +ssize_t read_file(const char *path, char *buf, size_t buf_size);
> > > +intmax_t read_file_integer(const char *path, intmax_t fallback);
> > >  int write_all_buf(int fd, const void *buf, size_t len);
> > >  int write_remainder(int fd, const struct iovec *iov, size_t iovcnt, size_t skip);
> > >  int read_all_buf(int fd, void *buf, size_t len);  

-- 
Stefano


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v8 2/6] util: Introduce read_file() and read_file_integer() function
  2025-11-14 10:12       ` Stefano Brivio
@ 2025-11-17  1:10         ` Yumei Huang
  2025-11-18  0:19           ` Stefano Brivio
  0 siblings, 1 reply; 32+ messages in thread
From: Yumei Huang @ 2025-11-17  1:10 UTC (permalink / raw)
  To: Stefano Brivio; +Cc: passt-dev, david

On Fri, Nov 14, 2025 at 6:12 PM Stefano Brivio <sbrivio@redhat.com> wrote:
>
> On Fri, 14 Nov 2025 09:58:57 +0800
> Yumei Huang <yuhuang@redhat.com> wrote:
>
> > On Fri, Nov 14, 2025 at 8:01 AM Stefano Brivio <sbrivio@redhat.com> wrote:
> > >
> > > On Mon, 10 Nov 2025 17:31:33 +0800
> > > Yumei Huang <yuhuang@redhat.com> wrote:
> > >
> > > > Signed-off-by: Yumei Huang <yuhuang@redhat.com>
> > > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > > > ---
> > > >  util.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > >  util.h |  2 ++
> > > >  2 files changed, 88 insertions(+)
> > > >
> > > > diff --git a/util.c b/util.c
> > > > index 44c21a3..c4c849c 100644
> > > > --- a/util.c
> > > > +++ b/util.c
> > > > @@ -590,6 +590,92 @@ int write_file(const char *path, const char *buf)
> > > >       return len == 0 ? 0 : -1;
> > > >  }
> > > >
> > > > +/**
> > > > + * read_file() - Read contents of file into a NULL-terminated buffer
> > > > + * @path:    Path to file to read
> > > > + * @buf:     Buffer to store file contents
> > > > + * @buf_size:        Size of buffer
> > > > + *
> > > > + * Return: number of bytes read on success, -1 on error, -ENOBUFS on truncation
> > > > + */
> > > > +ssize_t read_file(const char *path, char *buf, size_t buf_size)
> > > > +{
> > > > +     int fd = open(path, O_RDONLY | O_CLOEXEC);
> > > > +     size_t total_read = 0;
> > > > +     ssize_t rc;
> > > > +
> > > > +     if (fd < 0) {
> > > > +             warn_perror("Could not open %s", path);
> > > > +             return -1;
> > > > +     }
> > > > +
> > > > +     while (total_read < buf_size) {
> > > > +             rc = read(fd, buf + total_read, buf_size - total_read);
> > >
> > > cppcheck rightfully says that:
> > >
> > > util.c:604:10: style: The scope of the variable 'rc' can be reduced. [variableScope]
> > >  ssize_t rc;
> > >          ^
> >
> > Right.
> > Seems it also says:
> >
> > tcp.c:2814:0: style: The function 'tcp_get_rto_params' should have
> > static linkage since it is not used outside of its translation unit.
> > [staticFunction]
> > void tcp_get_rto_params(struct ctx *c)
> > ^
> > util.c:601:0: style: The function 'read_file' should have static
> > linkage since it is not used outside of its translation unit.
> > [staticFunction]
> > ssize_t read_file(const char *path, char *buf, size_t buf_size)
>
> Oops, my current version (2.16.0) doesn't say that, I should upgrade it
> (but I typically try to remain a few versions behind as David usually
> upgrades right away, so that we catch also differences).
>
> > I understand read_file() may be called from other places in the
> > future. But do we need to add static now? I guess we need it for
> > tcp_get_rto_params().
>
> On top of what David said, I'm not sure if we'll ever need it in
> tcp_get_rto_params(): I guess we'll always want to read integers there.

I meant we need to add static for tcp_get_rto_params(). But I got your
point. I will add static for both the functions.
>
> By the way, don't forget to drop the prototype from util.h as it's not
> needed if you make it static.

Thanks for the reminder!
>
> > > > +
> > > > +             if (rc < 0) {
> > > > +                     warn_perror("Couldn't read from %s", path);
> > > > +                     close(fd);
> > > > +                     return -1;
> > > > +             }
> > > > +
> > > > +             if (rc == 0)
> > > > +                     break;
> > > > +
> > > > +             total_read += rc;
> > > > +     }
> > > > +
> > > > +     close(fd);
> > > > +
> > > > +     if (total_read == buf_size) {
> > > > +             warn("File %s contents exceed buffer size %zu", path,
> > > > +                  buf_size);
> > > > +             buf[buf_size - 1] = '\0';
> > >
> > > I suggested we need this, but Coverity Scan points out that:
> > >
> > > ---
> > > /home/sbrivio/passt/util.c:631:3:
> > >   Type: Overflowed constant (INTEGER_OVERFLOW)
> > >
> > > /home/sbrivio/passt/util.c:606:2:
> > >   1. path: Condition "fd < 0", taking false branch.
> > > /home/sbrivio/passt/util.c:611:2:
> > >   2. path: Condition "total_read < buf_size", taking false branch.
> > > /home/sbrivio/passt/util.c:628:2:
> > >   3. path: Condition "total_read == buf_size", taking true branch.
> > > /home/sbrivio/passt/util.c:631:3:
> > >   4. overflow_const: Expression "buf_size - 1UL", where "buf_size" is known to be equal to 0, underflows the type of "buf_size - 1UL", which is type "unsigned long".
> > > ---
> >
> > Somehow my Coverity Scan didn't complain about that.
>
> Did you forget to pass '--security' to cov-analyze perhaps?

Yep, sorry.
>
> > > in the (faulty) case where somebody calls this with 0 as buf_size.
> > >
> > > On the other hand, the passed value of buf_size might be a result of a
> > > wrong calculation, and in that case we don't want to write some
> > > unrelated value on the stack of the caller or smash the stack.
> > >
> > > We could ASSERT(buf_size), but in the future we might abuse read_file()
> > > to just check that a file is there and can be read, instead of actually
> > > reading it.
> > >
> > > So maybe we could just return (after closing fd) before read() on
> > > !buf_size?
> >
> > Sure. I can add that.
> > >
> > > > +             return -ENOBUFS;
> > > > +     }
> > > > +
> > > > +     buf[total_read] = '\0';
> > > > +
> > > > +     return total_read;
> > > > +}
> > > > +
> > > > +/**
> > > > + * read_file_integer() - Read an integer value from a file
> > > > + * @path:    Path to file to read
> > > > + * @fallback:        Default value if file can't be read
> > > > + *
> > > > + * Return: integer value, @fallback on failure
> > > > + */
> > > > +intmax_t read_file_integer(const char *path, intmax_t fallback)
> > > > +{
> > > > +     ssize_t bytes_read;
> > > > +     char buf[BUFSIZ];
> > > > +     intmax_t value;
> > > > +     char *end;
> > > > +
> > > > +     bytes_read = read_file(path, buf, sizeof(buf));
> > > > +
> > > > +     if (bytes_read < 0)
> > > > +             return fallback;
> > > > +
> > > > +     if (bytes_read == 0) {
> > > > +             debug("Empty file %s", path);
> > > > +             return fallback;
> > > > +     }
> > > > +
> > > > +     errno = 0;
> > > > +     value = strtoimax(buf, &end, 10);
> > > > +     if (*end && *end != '\n') {
> > > > +             debug("Non-numeric content in %s", path);
> > > > +             return fallback;
> > > > +     }
> > > > +     if (errno) {
> > > > +             debug("Out of range value in %s: %s", path, buf);
> > > > +             return fallback;
> > > > +     }
> > > > +
> > > > +     return value;
> > > > +}
> > > > +
> > > >  #ifdef __ia64__
> > > >  /* Needed by do_clone() below: glibc doesn't export the prototype of __clone2(),
> > > >   * use the description from clone(2).
> > > > diff --git a/util.h b/util.h
> > > > index a0b2ada..c1502cc 100644
> > > > --- a/util.h
> > > > +++ b/util.h
> > > > @@ -229,6 +229,8 @@ void pidfile_write(int fd, pid_t pid);
> > > >  int __daemon(int pidfile_fd, int devnull_fd);
> > > >  int fls(unsigned long x);
> > > >  int write_file(const char *path, const char *buf);
> > > > +ssize_t read_file(const char *path, char *buf, size_t buf_size);
> > > > +intmax_t read_file_integer(const char *path, intmax_t fallback);
> > > >  int write_all_buf(int fd, const void *buf, size_t len);
> > > >  int write_remainder(int fd, const struct iovec *iov, size_t iovcnt, size_t skip);
> > > >  int read_all_buf(int fd, void *buf, size_t len);
>
> --
> Stefano
>


-- 
Thanks,

Yumei Huang


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v8 6/6] tcp: Clamp the retry timeout
  2025-11-14  3:35           ` David Gibson
@ 2025-11-17  2:38             ` Yumei Huang
  2025-11-17  4:50               ` David Gibson
  2025-11-18  0:19             ` Stefano Brivio
  1 sibling, 1 reply; 32+ messages in thread
From: Yumei Huang @ 2025-11-17  2:38 UTC (permalink / raw)
  To: David Gibson; +Cc: Stefano Brivio, passt-dev

On Fri, Nov 14, 2025 at 12:19 PM David Gibson
<david@gibson.dropbear.id.au> wrote:
>
> On Fri, Nov 14, 2025 at 11:05:51AM +0800, Yumei Huang wrote:
> > On Fri, Nov 14, 2025 at 8:47 AM David Gibson
> > <david@gibson.dropbear.id.au> wrote:
> > >
> > > On Fri, Nov 14, 2025 at 01:01:21AM +0100, Stefano Brivio wrote:
> > > > On Mon, 10 Nov 2025 21:56:39 +1100
> > > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > >
> > > > > On Mon, Nov 10, 2025 at 05:31:37PM +0800, Yumei Huang wrote:
> > > > > > Clamp the TCP retry timeout as Linux kernel does. If a retry occurs
> > > > > > during the handshake and the RTO is below 3 seconds, re-initialise
> > > > > > it to 3 seconds for data retransmissions according to RFC 6298.
> > > > > >
> > > > > > Suggested-by: Stefano Brivio <sbrivio@redhat.com>
> > > > > > Signed-off-by: Yumei Huang <yuhuang@redhat.com>
> > > > >
> > > > > Looks correct, so
> > > > >
> > > > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > > > >
> > > > > A few possible improvements (mostly comments/names) below.
> > > > >
> > > > > > ---
> > > > > >  tcp.c      | 25 ++++++++++++++++++++-----
> > > > > >  tcp.h      |  2 ++
> > > > > >  tcp_conn.h |  1 +
> > > > > >  3 files changed, 23 insertions(+), 5 deletions(-)
> > > > > >
> > > > > > diff --git a/tcp.c b/tcp.c
> > > > > > index ee111e0..b015658 100644
> > > > > > --- a/tcp.c
> > > > > > +++ b/tcp.c
> > > > > > @@ -187,6 +187,9 @@
> > > > > >   *   for established connections, or (syn_retries + syn_linear_timeouts) times
> > > > > >   *   during the handshake, reset the connection
> > > > > >   *
> > > > > > + * - RTO_INIT_ACK: if the RTO is less than this, re-initialise RTO to this for
> > > > > > + *   data retransmissions
> > > > > > + *
> > > > >
> > > > > Now that this is conditional on SYN being retried, the description
> > > > > doesn't look entirely accurate any more.
> > > >
> > > > If I read the whole thing again I would actually say it becomes
> > > > misleading and worth fixing before applying this. Also because the
> > > > relationship to the text in the RFC is not obvious anymore.
> > > >
> > > > This should be "if SYN retries happened during handshake, ...".
> > > >
> > > > > >   * - FIN_TIMEOUT: if a FIN segment was sent to tap/guest (flag ACK_FROM_TAP_DUE
> > > > > >   *   with TAP_FIN_SENT event), and no ACK is received within this time, reset
> > > > > >   *   the connection
> > > > > > @@ -340,6 +343,7 @@ enum {
> > > > > >
> > > > > >  #define ACK_INTERVAL                     10              /* ms */
> > > > > >  #define RTO_INIT                 1               /* s, RFC 6298 */
> > > > > > +#define RTO_INIT_ACK                     3               /* s, RFC 6298 */
> > > > >
> > > > > The relevance of "_ACK" in the name is not obvious to me.
> > > >
> > > > What about RTO_INIT_AFTER_SYN_RETRIES? We use it only once, and it
> > > > looks fine there.
> > >
> > > Works for me.
> > >
> > > > > >  #define FIN_TIMEOUT                      60
> > > > > >  #define ACT_TIMEOUT                      7200
> > > > > >
> > > > > -> @@ -365,9 +369,11 @@ uint8_t tcp_migrate_rcv_queue               [TCP_MIGRATE_RCV_QUEUE_MAX];
> > > > > >
> > > > > >  #define SYN_RETRIES              "/proc/sys/net/ipv4/tcp_syn_retries"
> > > > > >  #define SYN_LINEAR_TIMEOUTS      "/proc/sys/net/ipv4/tcp_syn_linear_timeouts"
> > > > > > +#define RTO_MAX_MS               "/proc/sys/net/ipv4/tcp_rto_max_ms"
> > > > > >
> > > > > >  #define SYN_RETRIES_DEFAULT              6
> > > > > >  #define SYN_LINEAR_TIMEOUTS_DEFAULT      4
> > > > > > +#define RTO_MAX_MS_DEFAULT               120000
> > > > > >  #define MAX_SYNCNT                       127 /* derived from kernel's limit */
> > > > > >
> > > > > >  /* "Extended" data (not stored in the flow table) for TCP flow migration */
> > > > > > @@ -392,7 +398,7 @@ static const char *tcp_state_str[] __attribute((__unused__)) = {
> > > > > >
> > > > > >  static const char *tcp_flag_str[] __attribute((__unused__)) = {
> > > > > >   "STALLED", "LOCAL", "ACTIVE_CLOSE", "ACK_TO_TAP_DUE",
> > > > > > - "ACK_FROM_TAP_DUE", "ACK_FROM_TAP_BLOCKS",
> > > > > > + "ACK_FROM_TAP_DUE", "ACK_FROM_TAP_BLOCKS", "SYN_RETRIED",
> > > > > >  };
> > > > > >
> > > > > >  /* Listening sockets, used for automatic port forwarding in pasta mode only */
> > > > > > @@ -590,10 +596,13 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
> > > > > >   if (conn->flags & ACK_TO_TAP_DUE) {
> > > > > >           it.it_value.tv_nsec = (long)ACK_INTERVAL * 1000 * 1000;
> > > > > >   } else if (conn->flags & ACK_FROM_TAP_DUE) {
> > > > > > -         int exp = conn->retries;
> > > > > > +         int exp = conn->retries, timeout = RTO_INIT;
> > > > > >           if (!(conn->events & ESTABLISHED))
> > > > > >                   exp -= c->tcp.syn_linear_timeouts;
> > > > > > -         it.it_value.tv_sec = RTO_INIT << MAX(exp, 0);
> > > > > > +         else if (conn->flags & SYN_RETRIED)
> > > > > > +                 timeout = MAX(timeout, RTO_INIT_ACK);
> > > > > > +         timeout <<= MAX(exp, 0);
> > > > > > +         it.it_value.tv_sec = MIN(timeout, c->tcp.rto_max);
> > > > > >   } else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
> > > > > >           it.it_value.tv_sec = FIN_TIMEOUT;
> > > > > >   } else {
> > > > > > @@ -2440,6 +2449,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
> > > > > >                           flow_trace(conn, "SYN timeout, retry");
> > > > > >                           tcp_send_flag(c, conn, SYN);
> > > > > >                           conn->retries++;
> > > > > > +                         conn_flag(c, conn, SYN_RETRIED);
> > > > > >                           tcp_timer_ctl(c, conn);
> > > > > >                   }
> > > > > >           } else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
> > > > > > @@ -2811,10 +2821,15 @@ void tcp_get_rto_params(struct ctx *c)
> > > > > >   v = read_file_integer(SYN_LINEAR_TIMEOUTS, SYN_LINEAR_TIMEOUTS_DEFAULT);
> > > > > >   c->tcp.syn_linear_timeouts = MIN(v, MAX_SYNCNT);
> > > > > >
> > > > > > + v = read_file_integer(RTO_MAX_MS, RTO_MAX_MS_DEFAULT);
> > > > > > + c->tcp.rto_max = MIN(DIV_ROUND_CLOSEST(v, 1000), INT_MAX);
> > > > >
> > > > > Possibly we should verify this is => RTO_INIT.
> > > >
> > > > As a sanity check, maybe, but I don't see any harmful effect if it's
> > > > < RTO_INIT, right? So I'm not sure if we should. No preference from my
> > > > side really.
> > >
> > > Sorry, describing this as >= RTO_INIT was misleading.  What I'm
> > > concerned about here is if the kernel value is set to 400ms, we'll
> > > round it to... 0s.
> > >
> > > So, really what I'm concerned about is that we ensure this is > 0.
> >
> > That's a good point.
>
> Actually, thinking about it, I wonder if makes more sense to always
> round up to 1s, rather than to the nearest 1s.

So it's to replace DIV_ROUND_CLOSEST with DIV_ROUND_UP, right?

>
>
> > > > > > +
> > > > > >   debug("Read sysctl values syn_retries: %"PRIu8
> > > > > > -       ", syn_linear_timeouts: %"PRIu8,
> > > > > > +       ", syn_linear_timeouts: %"PRIu8
> > > > > > +       ", rto_max: %d",
> > > > > >         c->tcp.syn_retries,
> > > > > > -       c->tcp.syn_linear_timeouts);
> > > > > > +       c->tcp.syn_linear_timeouts,
> > > > > > +       c->tcp.rto_max);
> > > > > >  }
> > > > > >
> > > > > >  /**
> > > > > > diff --git a/tcp.h b/tcp.h
> > > > > > index 37d7758..c4945c3 100644
> > > > > > --- a/tcp.h
> > > > > > +++ b/tcp.h
> > > > > > @@ -60,6 +60,7 @@ union tcp_listen_epoll_ref {
> > > > > >   * @fwd_out:             Port forwarding configuration for outbound packets
> > > > > >   * @timer_run:           Timestamp of most recent timer run
> > > > > >   * @pipe_size:           Size of pipes for spliced connections
> > > > > > + * @rto_max:             Maximal retry timeout (in s)
> > > >
> > > > As pointed out earlier, I think "maximum" is slightly more appropriate
> > > > here.
> > >
> > > Agreed, with nuance discussed earlier.
> >
> > Well, I must have misunderstood something in last week's meeting. I
> > joined the second half meeting a few mins later and I caught Stefano
> > saying he was wrong about maximal. Probably he was talking about the
> > French meaning. Anyway, I will update.
>
> Sorry, yeah, the discussion there was probably fairly confusing - I
> think both Stefano and I got a bit distracted from the question at
> hand by language-nerding.
>
> The upshot is that while I think both "maximal" and "maximum" would be
> correct, I think "maximum" is slightly clearer.

Got it.
>
> > >
> > > >
> > > > > >   * @syn_retries: SYN retries using exponential backoff timeout
> > > > > >   * @syn_linear_timeouts: SYN retries before using exponential backoff timeout
> > > > > >   */
> > > > > > @@ -68,6 +69,7 @@ struct tcp_ctx {
> > > > > >   struct fwd_ports fwd_out;
> > > > > >   struct timespec timer_run;
> > > > > >   size_t pipe_size;
> > > > > > + int rto_max;
> > > > > >   uint8_t syn_retries;
> > > > > >   uint8_t syn_linear_timeouts;
> > > > > >  };
> > > > > > diff --git a/tcp_conn.h b/tcp_conn.h
> > > > > > index 923af36..e36910c 100644
> > > > > > --- a/tcp_conn.h
> > > > > > +++ b/tcp_conn.h
> > > > > > @@ -77,6 +77,7 @@ struct tcp_tap_conn {
> > > > > >  #define ACK_TO_TAP_DUE           BIT(3)
> > > > > >  #define ACK_FROM_TAP_DUE BIT(4)
> > > > > >  #define ACK_FROM_TAP_BLOCKS      BIT(5)
> > > > > > +#define SYN_RETRIED              BIT(6)
> > > > > >
> > > > > >  #define SNDBUF_BITS              24
> > > > > >   unsigned int    sndbuf          :SNDBUF_BITS;
> > > > > > --
> > > > > > 2.51.0
> > > >
> > > > --
> > > > Stefano
> > > >
> > >
> > > --
> > > David Gibson (he or they)       | I'll have my music baroque, and my code
> > > david AT gibson.dropbear.id.au  | minimalist, thank you, not the other way
> > >                                 | around.
> > > http://www.ozlabs.org/~dgibson
> >
> >
> >
> > --
> > Thanks,
> >
> > Yumei Huang
> >
>
> --
> David Gibson (he or they)       | I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au  | minimalist, thank you, not the other way
>                                 | around.
> http://www.ozlabs.org/~dgibson



-- 
Thanks,

Yumei Huang


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v8 6/6] tcp: Clamp the retry timeout
  2025-11-17  2:38             ` Yumei Huang
@ 2025-11-17  4:50               ` David Gibson
  0 siblings, 0 replies; 32+ messages in thread
From: David Gibson @ 2025-11-17  4:50 UTC (permalink / raw)
  To: Yumei Huang; +Cc: Stefano Brivio, passt-dev

[-- Attachment #1: Type: text/plain, Size: 1709 bytes --]

On Mon, Nov 17, 2025 at 10:38:52AM +0800, Yumei Huang wrote:
> On Fri, Nov 14, 2025 at 12:19 PM David Gibson
> <david@gibson.dropbear.id.au> wrote:
> >
> > On Fri, Nov 14, 2025 at 11:05:51AM +0800, Yumei Huang wrote:
> > > On Fri, Nov 14, 2025 at 8:47 AM David Gibson
> > > <david@gibson.dropbear.id.au> wrote:
[snip]
> > > > > > > @@ -2811,10 +2821,15 @@ void tcp_get_rto_params(struct ctx *c)
> > > > > > >   v = read_file_integer(SYN_LINEAR_TIMEOUTS, SYN_LINEAR_TIMEOUTS_DEFAULT);
> > > > > > >   c->tcp.syn_linear_timeouts = MIN(v, MAX_SYNCNT);
> > > > > > >
> > > > > > > + v = read_file_integer(RTO_MAX_MS, RTO_MAX_MS_DEFAULT);
> > > > > > > + c->tcp.rto_max = MIN(DIV_ROUND_CLOSEST(v, 1000), INT_MAX);
> > > > > >
> > > > > > Possibly we should verify this is => RTO_INIT.
> > > > >
> > > > > As a sanity check, maybe, but I don't see any harmful effect if it's
> > > > > < RTO_INIT, right? So I'm not sure if we should. No preference from my
> > > > > side really.
> > > >
> > > > Sorry, describing this as >= RTO_INIT was misleading.  What I'm
> > > > concerned about here is if the kernel value is set to 400ms, we'll
> > > > round it to... 0s.
> > > >
> > > > So, really what I'm concerned about is that we ensure this is > 0.
> > >
> > > That's a good point.
> >
> > Actually, thinking about it, I wonder if makes more sense to always
> > round up to 1s, rather than to the nearest 1s.
> 
> So it's to replace DIV_ROUND_CLOSEST with DIV_ROUND_UP, right?

Yes.

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v8 6/6] tcp: Clamp the retry timeout
  2025-11-14  3:35           ` David Gibson
  2025-11-17  2:38             ` Yumei Huang
@ 2025-11-18  0:19             ` Stefano Brivio
  1 sibling, 0 replies; 32+ messages in thread
From: Stefano Brivio @ 2025-11-18  0:19 UTC (permalink / raw)
  To: David Gibson; +Cc: Yumei Huang, passt-dev

On Fri, 14 Nov 2025 14:35:12 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> On Fri, Nov 14, 2025 at 11:05:51AM +0800, Yumei Huang wrote:
> > On Fri, Nov 14, 2025 at 8:47 AM David Gibson
> > <david@gibson.dropbear.id.au> wrote:  
> > >
> > > On Fri, Nov 14, 2025 at 01:01:21AM +0100, Stefano Brivio wrote:  
> > > > On Mon, 10 Nov 2025 21:56:39 +1100
> > > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > >  
> > > > > On Mon, Nov 10, 2025 at 05:31:37PM +0800, Yumei Huang wrote:  
> > > > >
> > > > > > @@ -2811,10 +2821,15 @@ void tcp_get_rto_params(struct ctx *c)
> > > > > >   v = read_file_integer(SYN_LINEAR_TIMEOUTS, SYN_LINEAR_TIMEOUTS_DEFAULT);
> > > > > >   c->tcp.syn_linear_timeouts = MIN(v, MAX_SYNCNT);
> > > > > >
> > > > > > + v = read_file_integer(RTO_MAX_MS, RTO_MAX_MS_DEFAULT);
> > > > > > + c->tcp.rto_max = MIN(DIV_ROUND_CLOSEST(v, 1000), INT_MAX);  
> > > > >
> > > > > Possibly we should verify this is => RTO_INIT.  
> > > >
> > > > As a sanity check, maybe, but I don't see any harmful effect if it's
> > > > < RTO_INIT, right? So I'm not sure if we should. No preference from my
> > > > side really.  
> > >
> > > Sorry, describing this as >= RTO_INIT was misleading.  What I'm
> > > concerned about here is if the kernel value is set to 400ms, we'll
> > > round it to... 0s.
> > >
> > > So, really what I'm concerned about is that we ensure this is > 0.  
> > 
> > That's a good point.  
> 
> Actually, thinking about it, I wonder if makes more sense to always
> round up to 1s, rather than to the nearest 1s.

I guess so, yes.

Using two seconds as timeout when the user configured 1400ms isn't
necessarily less correct than using one second, and it simplifies things
(we already have DIV_ROUND_UP()).

-- 
Stefano


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v8 2/6] util: Introduce read_file() and read_file_integer() function
  2025-11-10  9:31 ` [PATCH v8 2/6] util: Introduce read_file() and read_file_integer() function Yumei Huang
  2025-11-14  0:01   ` Stefano Brivio
@ 2025-11-18  0:19   ` Stefano Brivio
  2025-11-18  3:22     ` David Gibson
  1 sibling, 1 reply; 32+ messages in thread
From: Stefano Brivio @ 2025-11-18  0:19 UTC (permalink / raw)
  To: Yumei Huang; +Cc: passt-dev, david

On Mon, 10 Nov 2025 17:31:33 +0800
Yumei Huang <yuhuang@redhat.com> wrote:

> Signed-off-by: Yumei Huang <yuhuang@redhat.com>
> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  util.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  util.h |  2 ++
>  2 files changed, 88 insertions(+)
> 
> diff --git a/util.c b/util.c
> index 44c21a3..c4c849c 100644
> --- a/util.c
> +++ b/util.c
> @@ -590,6 +590,92 @@ int write_file(const char *path, const char *buf)
>  	return len == 0 ? 0 : -1;
>  }
>  
> +/**
> + * read_file() - Read contents of file into a NULL-terminated buffer
> + * @path:	Path to file to read
> + * @buf:	Buffer to store file contents
> + * @buf_size:	Size of buffer
> + *
> + * Return: number of bytes read on success, -1 on error, -ENOBUFS on truncation
> + */
> +ssize_t read_file(const char *path, char *buf, size_t buf_size)
> +{
> +	int fd = open(path, O_RDONLY | O_CLOEXEC);
> +	size_t total_read = 0;
> +	ssize_t rc;
> +
> +	if (fd < 0) {
> +		warn_perror("Could not open %s", path);

While testing something unrelated I realised that this looks like a
serious failure, but it's perfectly normal on (even slightly) older
kernels:

---
$ git describe --contains ccce324dabfe # tcp: make the first N SYN RTO backoffs linear
v6.5-rc1~163^2~299
$ git describe --contains 1280c26228bd # tcp: add tcp_rto_max_ms sysctl
v6.15-rc1~160^2~352^2
---

On a 6.1 kernel, for example:

---
$ ./pasta -d --config-net
[...]
0.0258: Could not open /proc/sys/net/ipv4/tcp_syn_linear_timeouts: No such file or directory
0.0258: Could not open /proc/sys/net/ipv4/tcp_rto_max_ms: No such file or directory
0.0258: Read sysctl values syn_retries: 6, syn_linear_timeouts: 4, rto_max: 120
---

and actually the third message isn't accurate, because we didn't read
those values, we are just using them.

Worse, yet:

---
$ ./pasta
Could not open /proc/sys/net/ipv4/tcp_syn_linear_timeouts: No such file or directory
Could not open /proc/sys/net/ipv4/tcp_rto_max_ms: No such file or directory
---

so this would be logged *with default options* by Podman, rootlesskit /
Docker, via libvirt, and by other users too, meaning we would get
submerged by reports about this "error" if we release it like this (6.15
is fairly recent).

So maybe we could turn this (and the messages below) to debug() /
debug_perror(), and then:

> +		return -1;
> +	}
> +
> +	while (total_read < buf_size) {
> +		rc = read(fd, buf + total_read, buf_size - total_read);
> +
> +		if (rc < 0) {
> +			warn_perror("Couldn't read from %s", path);
> +			close(fd);
> +			return -1;
> +		}
> +
> +		if (rc == 0)
> +			break;
> +
> +		total_read += rc;
> +	}
> +
> +	close(fd);
> +
> +	if (total_read == buf_size) {
> +		warn("File %s contents exceed buffer size %zu", path,
> +		     buf_size);
> +		buf[buf_size - 1] = '\0';
> +		return -ENOBUFS;
> +	}
> +
> +	buf[total_read] = '\0';
> +
> +	return total_read;
> +}
> +
> +/**
> + * read_file_integer() - Read an integer value from a file
> + * @path:	Path to file to read
> + * @fallback:	Default value if file can't be read
> + *
> + * Return: integer value, @fallback on failure
> + */
> +intmax_t read_file_integer(const char *path, intmax_t fallback)
> +{
> +	ssize_t bytes_read;
> +	char buf[BUFSIZ];
> +	intmax_t value;
> +	char *end;
> +
> +	bytes_read = read_file(path, buf, sizeof(buf));
> +
> +	if (bytes_read < 0)
> +		return fallback;

...say something here, like "using %i as default value", so that it's
clear that the error doesn't have further consequences.

We should probably do that for all the failure cases here, so you might
want to 'goto error;' here, and also:

> +
> +	if (bytes_read == 0) {
> +		debug("Empty file %s", path);

here, and below, and then:

> +		return fallback;
> +	}
> +
> +	errno = 0;
> +	value = strtoimax(buf, &end, 10);
> +	if (*end && *end != '\n') {
> +		debug("Non-numeric content in %s", path);
> +		return fallback;
> +	}
> +	if (errno) {
> +		debug("Out of range value in %s: %s", path, buf);
> +		return fallback;
> +	}
> +
> +	return value;

error:
	debug("Using ...")
	return fallback;

> +}
> +
>  #ifdef __ia64__
>  /* Needed by do_clone() below: glibc doesn't export the prototype of __clone2(),
>   * use the description from clone(2).
> diff --git a/util.h b/util.h
> index a0b2ada..c1502cc 100644
> --- a/util.h
> +++ b/util.h
> @@ -229,6 +229,8 @@ void pidfile_write(int fd, pid_t pid);
>  int __daemon(int pidfile_fd, int devnull_fd);
>  int fls(unsigned long x);
>  int write_file(const char *path, const char *buf);
> +ssize_t read_file(const char *path, char *buf, size_t buf_size);
> +intmax_t read_file_integer(const char *path, intmax_t fallback);
>  int write_all_buf(int fd, const void *buf, size_t len);
>  int write_remainder(int fd, const struct iovec *iov, size_t iovcnt, size_t skip);
>  int read_all_buf(int fd, void *buf, size_t len);

-- 
Stefano


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v8 4/6] tcp: Resend SYN for inbound connections
  2025-11-10  9:31 ` [PATCH v8 4/6] tcp: Resend SYN for inbound connections Yumei Huang
  2025-11-10 10:46   ` David Gibson
@ 2025-11-18  0:19   ` Stefano Brivio
  1 sibling, 0 replies; 32+ messages in thread
From: Stefano Brivio @ 2025-11-18  0:19 UTC (permalink / raw)
  To: Yumei Huang; +Cc: passt-dev, david

On Mon, 10 Nov 2025 17:31:35 +0800
Yumei Huang <yuhuang@redhat.com> wrote:

> +/**
> + * tcp_get_rto_params() - Get host kernel RTO parameters
> + * @c:		Execution context
> + */
> +void tcp_get_rto_params(struct ctx *c)
> +{
> +	intmax_t v;
> +
> +	v = read_file_integer(SYN_RETRIES, SYN_RETRIES_DEFAULT);
> +	c->tcp.syn_retries = MIN(v, MAX_SYNCNT);
> +
> +	v = read_file_integer(SYN_LINEAR_TIMEOUTS, SYN_LINEAR_TIMEOUTS_DEFAULT);
> +	c->tcp.syn_linear_timeouts = MIN(v, MAX_SYNCNT);
> +
> +	debug("Read sysctl values syn_retries: %"PRIu8
> +	      ", syn_linear_timeouts: %"PRIu8,
> +	      c->tcp.syn_retries,
> +	      c->tcp.syn_linear_timeouts);

See my comment to 2/6: this should be "Using sysctl values ..." instead.

-- 
Stefano


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v8 2/6] util: Introduce read_file() and read_file_integer() function
  2025-11-17  1:10         ` Yumei Huang
@ 2025-11-18  0:19           ` Stefano Brivio
  0 siblings, 0 replies; 32+ messages in thread
From: Stefano Brivio @ 2025-11-18  0:19 UTC (permalink / raw)
  To: Yumei Huang; +Cc: passt-dev, david

On Mon, 17 Nov 2025 09:10:01 +0800
Yumei Huang <yuhuang@redhat.com> wrote:

> On Fri, Nov 14, 2025 at 6:12 PM Stefano Brivio <sbrivio@redhat.com> wrote:
> >
> > On Fri, 14 Nov 2025 09:58:57 +0800
> > Yumei Huang <yuhuang@redhat.com> wrote:
> >  
> > > On Fri, Nov 14, 2025 at 8:01 AM Stefano Brivio <sbrivio@redhat.com> wrote:  
> > > >
> > > > On Mon, 10 Nov 2025 17:31:33 +0800
> > > > Yumei Huang <yuhuang@redhat.com> wrote:
> > > >  
> > > > > Signed-off-by: Yumei Huang <yuhuang@redhat.com>
> > > > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > > > > ---
> > > > >  util.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > > >  util.h |  2 ++
> > > > >  2 files changed, 88 insertions(+)
> > > > >
> > > > > diff --git a/util.c b/util.c
> > > > > index 44c21a3..c4c849c 100644
> > > > > --- a/util.c
> > > > > +++ b/util.c
> > > > > @@ -590,6 +590,92 @@ int write_file(const char *path, const char *buf)
> > > > >       return len == 0 ? 0 : -1;
> > > > >  }
> > > > >
> > > > > +/**
> > > > > + * read_file() - Read contents of file into a NULL-terminated buffer
> > > > > + * @path:    Path to file to read
> > > > > + * @buf:     Buffer to store file contents
> > > > > + * @buf_size:        Size of buffer
> > > > > + *
> > > > > + * Return: number of bytes read on success, -1 on error, -ENOBUFS on truncation
> > > > > + */
> > > > > +ssize_t read_file(const char *path, char *buf, size_t buf_size)
> > > > > +{
> > > > > +     int fd = open(path, O_RDONLY | O_CLOEXEC);
> > > > > +     size_t total_read = 0;
> > > > > +     ssize_t rc;
> > > > > +
> > > > > +     if (fd < 0) {
> > > > > +             warn_perror("Could not open %s", path);
> > > > > +             return -1;
> > > > > +     }
> > > > > +
> > > > > +     while (total_read < buf_size) {
> > > > > +             rc = read(fd, buf + total_read, buf_size - total_read);  
> > > >
> > > > cppcheck rightfully says that:
> > > >
> > > > util.c:604:10: style: The scope of the variable 'rc' can be reduced. [variableScope]
> > > >  ssize_t rc;
> > > >          ^  
> > >
> > > Right.
> > > Seems it also says:
> > >
> > > tcp.c:2814:0: style: The function 'tcp_get_rto_params' should have
> > > static linkage since it is not used outside of its translation unit.
> > > [staticFunction]
> > > void tcp_get_rto_params(struct ctx *c)
> > > ^
> > > util.c:601:0: style: The function 'read_file' should have static
> > > linkage since it is not used outside of its translation unit.
> > > [staticFunction]
> > > ssize_t read_file(const char *path, char *buf, size_t buf_size)  
> >
> > Oops, my current version (2.16.0) doesn't say that, I should upgrade it
> > (but I typically try to remain a few versions behind as David usually
> > upgrades right away, so that we catch also differences).
> >  
> > > I understand read_file() may be called from other places in the
> > > future. But do we need to add static now? I guess we need it for
> > > tcp_get_rto_params().  
> >
> > On top of what David said, I'm not sure if we'll ever need it in
> > tcp_get_rto_params(): I guess we'll always want to read integers there.  
> 
> I meant we need to add static for tcp_get_rto_params().

Ah, yes, right, that one too, it's not used outside of tcp.c (its
compilation unit).

-- 
Stefano


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v8 2/6] util: Introduce read_file() and read_file_integer() function
  2025-11-18  0:19   ` Stefano Brivio
@ 2025-11-18  3:22     ` David Gibson
  2025-11-19  9:04       ` Yumei Huang
  0 siblings, 1 reply; 32+ messages in thread
From: David Gibson @ 2025-11-18  3:22 UTC (permalink / raw)
  To: Stefano Brivio; +Cc: Yumei Huang, passt-dev

[-- Attachment #1: Type: text/plain, Size: 6378 bytes --]

On Tue, Nov 18, 2025 at 01:19:38AM +0100, Stefano Brivio wrote:
> On Mon, 10 Nov 2025 17:31:33 +0800
> Yumei Huang <yuhuang@redhat.com> wrote:
> 
> > Signed-off-by: Yumei Huang <yuhuang@redhat.com>
> > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> >  util.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  util.h |  2 ++
> >  2 files changed, 88 insertions(+)
> > 
> > diff --git a/util.c b/util.c
> > index 44c21a3..c4c849c 100644
> > --- a/util.c
> > +++ b/util.c
> > @@ -590,6 +590,92 @@ int write_file(const char *path, const char *buf)
> >  	return len == 0 ? 0 : -1;
> >  }
> >  
> > +/**
> > + * read_file() - Read contents of file into a NULL-terminated buffer
> > + * @path:	Path to file to read
> > + * @buf:	Buffer to store file contents
> > + * @buf_size:	Size of buffer
> > + *
> > + * Return: number of bytes read on success, -1 on error, -ENOBUFS on truncation
> > + */
> > +ssize_t read_file(const char *path, char *buf, size_t buf_size)
> > +{
> > +	int fd = open(path, O_RDONLY | O_CLOEXEC);
> > +	size_t total_read = 0;
> > +	ssize_t rc;
> > +
> > +	if (fd < 0) {
> > +		warn_perror("Could not open %s", path);
> 
> While testing something unrelated I realised that this looks like a
> serious failure, but it's perfectly normal on (even slightly) older
> kernels:
> 
> ---
> $ git describe --contains ccce324dabfe # tcp: make the first N SYN RTO backoffs linear
> v6.5-rc1~163^2~299
> $ git describe --contains 1280c26228bd # tcp: add tcp_rto_max_ms sysctl
> v6.15-rc1~160^2~352^2
> ---
> 
> On a 6.1 kernel, for example:
> 
> ---
> $ ./pasta -d --config-net
> [...]
> 0.0258: Could not open /proc/sys/net/ipv4/tcp_syn_linear_timeouts: No such file or directory
> 0.0258: Could not open /proc/sys/net/ipv4/tcp_rto_max_ms: No such file or directory
> 0.0258: Read sysctl values syn_retries: 6, syn_linear_timeouts: 4, rto_max: 120
> ---
> 
> and actually the third message isn't accurate, because we didn't read
> those values, we are just using them.

Right.  In fact I'd say here the fact they came from sysctl is less
important here than what we're using them for.  So maybe
	Using TCP RTO parameters, syn_retries: 6, ...

> 
> Worse, yet:
> 
> ---
> $ ./pasta
> Could not open /proc/sys/net/ipv4/tcp_syn_linear_timeouts: No such file or directory
> Could not open /proc/sys/net/ipv4/tcp_rto_max_ms: No such file or directory
> ---
> 
> so this would be logged *with default options* by Podman, rootlesskit /
> Docker, via libvirt, and by other users too, meaning we would get
> submerged by reports about this "error" if we release it like this (6.15
> is fairly recent).
> 
> So maybe we could turn this (and the messages below) to debug() /
> debug_perror(), and then:

I think we should remove the error message from read_file() entirely,
just returning an error code.  Essentially it is too low level to know
the severity and meaning of a failure, so reporting the problem to the
user is better left to the valler.

> > +		return -1;
> > +	}
> > +
> > +	while (total_read < buf_size) {
> > +		rc = read(fd, buf + total_read, buf_size - total_read);
> > +
> > +		if (rc < 0) {
> > +			warn_perror("Couldn't read from %s", path);
> > +			close(fd);
> > +			return -1;
> > +		}
> > +
> > +		if (rc == 0)
> > +			break;
> > +
> > +		total_read += rc;
> > +	}
> > +
> > +	close(fd);
> > +
> > +	if (total_read == buf_size) {
> > +		warn("File %s contents exceed buffer size %zu", path,
> > +		     buf_size);
> > +		buf[buf_size - 1] = '\0';
> > +		return -ENOBUFS;
> > +	}
> > +
> > +	buf[total_read] = '\0';
> > +
> > +	return total_read;
> > +}
> > +
> > +/**
> > + * read_file_integer() - Read an integer value from a file
> > + * @path:	Path to file to read
> > + * @fallback:	Default value if file can't be read
> > + *
> > + * Return: integer value, @fallback on failure
> > + */
> > +intmax_t read_file_integer(const char *path, intmax_t fallback)
> > +{
> > +	ssize_t bytes_read;
> > +	char buf[BUFSIZ];
> > +	intmax_t value;
> > +	char *end;
> > +
> > +	bytes_read = read_file(path, buf, sizeof(buf));
> > +
> > +	if (bytes_read < 0)
> > +		return fallback;
> 
> ...say something here, like "using %i as default value", so that it's
> clear that the error doesn't have further consequences.
> 
> We should probably do that for all the failure cases here, so you might
> want to 'goto error;' here, and also:

Arguably read_file_integer() is still too low-level to useful report
the error to the user.  But that would require a clunkier interface so
we can return an error code as well as the parsed value.

Saying it's falling back here would certainly be an improvement.

> > +
> > +	if (bytes_read == 0) {
> > +		debug("Empty file %s", path);
> 
> here, and below, and then:
> 
> > +		return fallback;
> > +	}
> > +
> > +	errno = 0;
> > +	value = strtoimax(buf, &end, 10);
> > +	if (*end && *end != '\n') {
> > +		debug("Non-numeric content in %s", path);
> > +		return fallback;
> > +	}
> > +	if (errno) {
> > +		debug("Out of range value in %s: %s", path, buf);
> > +		return fallback;
> > +	}
> > +
> > +	return value;
> 
> error:
> 	debug("Using ...")
> 	return fallback;
> 
> > +}
> > +
> >  #ifdef __ia64__
> >  /* Needed by do_clone() below: glibc doesn't export the prototype of __clone2(),
> >   * use the description from clone(2).
> > diff --git a/util.h b/util.h
> > index a0b2ada..c1502cc 100644
> > --- a/util.h
> > +++ b/util.h
> > @@ -229,6 +229,8 @@ void pidfile_write(int fd, pid_t pid);
> >  int __daemon(int pidfile_fd, int devnull_fd);
> >  int fls(unsigned long x);
> >  int write_file(const char *path, const char *buf);
> > +ssize_t read_file(const char *path, char *buf, size_t buf_size);
> > +intmax_t read_file_integer(const char *path, intmax_t fallback);
> >  int write_all_buf(int fd, const void *buf, size_t len);
> >  int write_remainder(int fd, const struct iovec *iov, size_t iovcnt, size_t skip);
> >  int read_all_buf(int fd, void *buf, size_t len);
> 
> -- 
> Stefano
> 

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v8 2/6] util: Introduce read_file() and read_file_integer() function
  2025-11-18  3:22     ` David Gibson
@ 2025-11-19  9:04       ` Yumei Huang
  2025-11-19  9:38         ` David Gibson
  0 siblings, 1 reply; 32+ messages in thread
From: Yumei Huang @ 2025-11-19  9:04 UTC (permalink / raw)
  To: David Gibson; +Cc: Stefano Brivio, passt-dev

On Tue, Nov 18, 2025 at 11:36 AM David Gibson
<david@gibson.dropbear.id.au> wrote:
>
> On Tue, Nov 18, 2025 at 01:19:38AM +0100, Stefano Brivio wrote:
> > On Mon, 10 Nov 2025 17:31:33 +0800
> > Yumei Huang <yuhuang@redhat.com> wrote:
> >
> > > Signed-off-by: Yumei Huang <yuhuang@redhat.com>
> > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > > ---
> > >  util.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  util.h |  2 ++
> > >  2 files changed, 88 insertions(+)
> > >
> > > diff --git a/util.c b/util.c
> > > index 44c21a3..c4c849c 100644
> > > --- a/util.c
> > > +++ b/util.c
> > > @@ -590,6 +590,92 @@ int write_file(const char *path, const char *buf)
> > >     return len == 0 ? 0 : -1;
> > >  }
> > >
> > > +/**
> > > + * read_file() - Read contents of file into a NULL-terminated buffer
> > > + * @path:  Path to file to read
> > > + * @buf:   Buffer to store file contents
> > > + * @buf_size:      Size of buffer
> > > + *
> > > + * Return: number of bytes read on success, -1 on error, -ENOBUFS on truncation
> > > + */
> > > +ssize_t read_file(const char *path, char *buf, size_t buf_size)
> > > +{
> > > +   int fd = open(path, O_RDONLY | O_CLOEXEC);
> > > +   size_t total_read = 0;
> > > +   ssize_t rc;
> > > +
> > > +   if (fd < 0) {
> > > +           warn_perror("Could not open %s", path);
> >
> > While testing something unrelated I realised that this looks like a
> > serious failure, but it's perfectly normal on (even slightly) older
> > kernels:
> >
> > ---
> > $ git describe --contains ccce324dabfe # tcp: make the first N SYN RTO backoffs linear
> > v6.5-rc1~163^2~299
> > $ git describe --contains 1280c26228bd # tcp: add tcp_rto_max_ms sysctl
> > v6.15-rc1~160^2~352^2
> > ---
> >
> > On a 6.1 kernel, for example:
> >
> > ---
> > $ ./pasta -d --config-net
> > [...]
> > 0.0258: Could not open /proc/sys/net/ipv4/tcp_syn_linear_timeouts: No such file or directory
> > 0.0258: Could not open /proc/sys/net/ipv4/tcp_rto_max_ms: No such file or directory
> > 0.0258: Read sysctl values syn_retries: 6, syn_linear_timeouts: 4, rto_max: 120
> > ---
> >
> > and actually the third message isn't accurate, because we didn't read
> > those values, we are just using them.
>
> Right.  In fact I'd say here the fact they came from sysctl is less
> important here than what we're using them for.  So maybe
>         Using TCP RTO parameters, syn_retries: 6, ...

Got it.
>
> >
> > Worse, yet:
> >
> > ---
> > $ ./pasta
> > Could not open /proc/sys/net/ipv4/tcp_syn_linear_timeouts: No such file or directory
> > Could not open /proc/sys/net/ipv4/tcp_rto_max_ms: No such file or directory
> > ---
> >
> > so this would be logged *with default options* by Podman, rootlesskit /
> > Docker, via libvirt, and by other users too, meaning we would get
> > submerged by reports about this "error" if we release it like this (6.15
> > is fairly recent).
> >
> > So maybe we could turn this (and the messages below) to debug() /
> > debug_perror(), and then:
>
> I think we should remove the error message from read_file() entirely,
> just returning an error code.  Essentially it is too low level to know
> the severity and meaning of a failure, so reporting the problem to the
> user is better left to the valler.

Then we will have a few more values to return, like errno for open or
read file fails, -ENOBUFS, and another error code for when buf size is
0.  I'd say it's more complicated than write_fiel(). Maybe we could
update with debug()/debug_perror() first as Stefano suggested, and
then fix them together later?
>
> > > +           return -1;
> > > +   }
> > > +
> > > +   while (total_read < buf_size) {
> > > +           rc = read(fd, buf + total_read, buf_size - total_read);
> > > +
> > > +           if (rc < 0) {
> > > +                   warn_perror("Couldn't read from %s", path);
> > > +                   close(fd);
> > > +                   return -1;
> > > +           }
> > > +
> > > +           if (rc == 0)
> > > +                   break;
> > > +
> > > +           total_read += rc;
> > > +   }
> > > +
> > > +   close(fd);
> > > +
> > > +   if (total_read == buf_size) {
> > > +           warn("File %s contents exceed buffer size %zu", path,
> > > +                buf_size);
> > > +           buf[buf_size - 1] = '\0';
> > > +           return -ENOBUFS;
> > > +   }
> > > +
> > > +   buf[total_read] = '\0';
> > > +
> > > +   return total_read;
> > > +}
> > > +
> > > +/**
> > > + * read_file_integer() - Read an integer value from a file
> > > + * @path:  Path to file to read
> > > + * @fallback:      Default value if file can't be read
> > > + *
> > > + * Return: integer value, @fallback on failure
> > > + */
> > > +intmax_t read_file_integer(const char *path, intmax_t fallback)
> > > +{
> > > +   ssize_t bytes_read;
> > > +   char buf[BUFSIZ];
> > > +   intmax_t value;
> > > +   char *end;
> > > +
> > > +   bytes_read = read_file(path, buf, sizeof(buf));
> > > +
> > > +   if (bytes_read < 0)
> > > +           return fallback;
> >
> > ...say something here, like "using %i as default value", so that it's
> > clear that the error doesn't have further consequences.
> >
> > We should probably do that for all the failure cases here, so you might
> > want to 'goto error;' here, and also:
>
> Arguably read_file_integer() is still too low-level to useful report
> the error to the user.  But that would require a clunkier interface so
> we can return an error code as well as the parsed value.
>
> Saying it's falling back here would certainly be an improvement.

Sure, I can add the falling back message.
>
> > > +
> > > +   if (bytes_read == 0) {
> > > +           debug("Empty file %s", path);
> >
> > here, and below, and then:
> >
> > > +           return fallback;
> > > +   }
> > > +
> > > +   errno = 0;
> > > +   value = strtoimax(buf, &end, 10);
> > > +   if (*end && *end != '\n') {
> > > +           debug("Non-numeric content in %s", path);
> > > +           return fallback;
> > > +   }
> > > +   if (errno) {
> > > +           debug("Out of range value in %s: %s", path, buf);
> > > +           return fallback;
> > > +   }
> > > +
> > > +   return value;
> >
> > error:
> >       debug("Using ...")
> >       return fallback;
> >
> > > +}
> > > +
> > >  #ifdef __ia64__
> > >  /* Needed by do_clone() below: glibc doesn't export the prototype of __clone2(),
> > >   * use the description from clone(2).
> > > diff --git a/util.h b/util.h
> > > index a0b2ada..c1502cc 100644
> > > --- a/util.h
> > > +++ b/util.h
> > > @@ -229,6 +229,8 @@ void pidfile_write(int fd, pid_t pid);
> > >  int __daemon(int pidfile_fd, int devnull_fd);
> > >  int fls(unsigned long x);
> > >  int write_file(const char *path, const char *buf);
> > > +ssize_t read_file(const char *path, char *buf, size_t buf_size);
> > > +intmax_t read_file_integer(const char *path, intmax_t fallback);
> > >  int write_all_buf(int fd, const void *buf, size_t len);
> > >  int write_remainder(int fd, const struct iovec *iov, size_t iovcnt, size_t skip);
> > >  int read_all_buf(int fd, void *buf, size_t len);
> >
> > --
> > Stefano
> >
>
> --
> David Gibson (he or they)       | I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au  | minimalist, thank you, not the other way
>                                 | around.
> http://www.ozlabs.org/~dgibson



-- 
Thanks,

Yumei Huang


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v8 2/6] util: Introduce read_file() and read_file_integer() function
  2025-11-19  9:04       ` Yumei Huang
@ 2025-11-19  9:38         ` David Gibson
  0 siblings, 0 replies; 32+ messages in thread
From: David Gibson @ 2025-11-19  9:38 UTC (permalink / raw)
  To: Yumei Huang; +Cc: Stefano Brivio, passt-dev

[-- Attachment #1: Type: text/plain, Size: 4301 bytes --]

On Wed, Nov 19, 2025 at 05:04:31PM +0800, Yumei Huang wrote:
> On Tue, Nov 18, 2025 at 11:36 AM David Gibson
> <david@gibson.dropbear.id.au> wrote:
> >
> > On Tue, Nov 18, 2025 at 01:19:38AM +0100, Stefano Brivio wrote:
> > > On Mon, 10 Nov 2025 17:31:33 +0800
> > > Yumei Huang <yuhuang@redhat.com> wrote:
> > >
> > > > Signed-off-by: Yumei Huang <yuhuang@redhat.com>
> > > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > > > ---
> > > >  util.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > >  util.h |  2 ++
> > > >  2 files changed, 88 insertions(+)
> > > >
> > > > diff --git a/util.c b/util.c
> > > > index 44c21a3..c4c849c 100644
> > > > --- a/util.c
> > > > +++ b/util.c
> > > > @@ -590,6 +590,92 @@ int write_file(const char *path, const char *buf)
> > > >     return len == 0 ? 0 : -1;
> > > >  }
> > > >
> > > > +/**
> > > > + * read_file() - Read contents of file into a NULL-terminated buffer
> > > > + * @path:  Path to file to read
> > > > + * @buf:   Buffer to store file contents
> > > > + * @buf_size:      Size of buffer
> > > > + *
> > > > + * Return: number of bytes read on success, -1 on error, -ENOBUFS on truncation
> > > > + */
> > > > +ssize_t read_file(const char *path, char *buf, size_t buf_size)
> > > > +{
> > > > +   int fd = open(path, O_RDONLY | O_CLOEXEC);
> > > > +   size_t total_read = 0;
> > > > +   ssize_t rc;
> > > > +
> > > > +   if (fd < 0) {
> > > > +           warn_perror("Could not open %s", path);
> > >
> > > While testing something unrelated I realised that this looks like a
> > > serious failure, but it's perfectly normal on (even slightly) older
> > > kernels:
> > >
> > > ---
> > > $ git describe --contains ccce324dabfe # tcp: make the first N SYN RTO backoffs linear
> > > v6.5-rc1~163^2~299
> > > $ git describe --contains 1280c26228bd # tcp: add tcp_rto_max_ms sysctl
> > > v6.15-rc1~160^2~352^2
> > > ---
> > >
> > > On a 6.1 kernel, for example:
> > >
> > > ---
> > > $ ./pasta -d --config-net
> > > [...]
> > > 0.0258: Could not open /proc/sys/net/ipv4/tcp_syn_linear_timeouts: No such file or directory
> > > 0.0258: Could not open /proc/sys/net/ipv4/tcp_rto_max_ms: No such file or directory
> > > 0.0258: Read sysctl values syn_retries: 6, syn_linear_timeouts: 4, rto_max: 120
> > > ---
> > >
> > > and actually the third message isn't accurate, because we didn't read
> > > those values, we are just using them.
> >
> > Right.  In fact I'd say here the fact they came from sysctl is less
> > important here than what we're using them for.  So maybe
> >         Using TCP RTO parameters, syn_retries: 6, ...
> 
> Got it.
> >
> > >
> > > Worse, yet:
> > >
> > > ---
> > > $ ./pasta
> > > Could not open /proc/sys/net/ipv4/tcp_syn_linear_timeouts: No such file or directory
> > > Could not open /proc/sys/net/ipv4/tcp_rto_max_ms: No such file or directory
> > > ---
> > >
> > > so this would be logged *with default options* by Podman, rootlesskit /
> > > Docker, via libvirt, and by other users too, meaning we would get
> > > submerged by reports about this "error" if we release it like this (6.15
> > > is fairly recent).
> > >
> > > So maybe we could turn this (and the messages below) to debug() /
> > > debug_perror(), and then:
> >
> > I think we should remove the error message from read_file() entirely,
> > just returning an error code.  Essentially it is too low level to know
> > the severity and meaning of a failure, so reporting the problem to the
> > user is better left to the valler.
> 
> Then we will have a few more values to return, like errno for open or
> read file fails, -ENOBUFS, and another error code for when buf size is
> 0.  I'd say it's more complicated than write_fiel(). Maybe we could
> update with debug()/debug_perror() first as Stefano suggested, and
> then fix them together later?

Right, returning full error details is somewhere between awkward and
impossible in C.  I think returning -errno for the first error to
occur is probably good enough, though.

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2025-11-19  9:39 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-11-10  9:31 [PATCH v8 0/6] Retry SYNs for inbound connections Yumei Huang
2025-11-10  9:31 ` [PATCH v8 1/6] tcp: Rename "retrans" to "retries" Yumei Huang
2025-11-10  9:31 ` [PATCH v8 2/6] util: Introduce read_file() and read_file_integer() function Yumei Huang
2025-11-14  0:01   ` Stefano Brivio
2025-11-14  1:58     ` Yumei Huang
2025-11-14  4:32       ` David Gibson
2025-11-14 10:12       ` Stefano Brivio
2025-11-17  1:10         ` Yumei Huang
2025-11-18  0:19           ` Stefano Brivio
2025-11-18  0:19   ` Stefano Brivio
2025-11-18  3:22     ` David Gibson
2025-11-19  9:04       ` Yumei Huang
2025-11-19  9:38         ` David Gibson
2025-11-10  9:31 ` [PATCH v8 3/6] tcp: Add parameter struct ctx *c to tcp_timer_ctl() Yumei Huang
2025-11-10 10:35   ` David Gibson
2025-11-14  0:01     ` Stefano Brivio
2025-11-14  0:36       ` David Gibson
2025-11-10  9:31 ` [PATCH v8 4/6] tcp: Resend SYN for inbound connections Yumei Huang
2025-11-10 10:46   ` David Gibson
2025-11-14  0:00     ` Stefano Brivio
2025-11-14  0:31       ` David Gibson
2025-11-18  0:19   ` Stefano Brivio
2025-11-10  9:31 ` [PATCH v8 5/6] tcp: Update data retransmission timeout Yumei Huang
2025-11-10  9:31 ` [PATCH v8 6/6] tcp: Clamp the retry timeout Yumei Huang
2025-11-10 10:56   ` David Gibson
2025-11-14  0:01     ` Stefano Brivio
2025-11-14  0:35       ` David Gibson
2025-11-14  3:05         ` Yumei Huang
2025-11-14  3:35           ` David Gibson
2025-11-17  2:38             ` Yumei Huang
2025-11-17  4:50               ` David Gibson
2025-11-18  0:19             ` Stefano Brivio

Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).