On Tue, Nov 25, 2025 at 03:26:38PM +0800, Yumei Huang wrote: > Clamp the TCP retry timeout as Linux kernel does. If a retry occurs > during the handshake and the RTO is below 3 seconds, re-initialise > it to 3 seconds for data retransmissions according to RFC 6298. > > Suggested-by: Stefano Brivio > Signed-off-by: Yumei Huang Reviewed-by: David Gibson > --- > tcp.c | 25 ++++++++++++++++++++----- > tcp.h | 2 ++ > tcp_conn.h | 1 + > 3 files changed, 23 insertions(+), 5 deletions(-) > > diff --git a/tcp.c b/tcp.c > index b3aa064..887b32f 100644 > --- a/tcp.c > +++ b/tcp.c > @@ -187,6 +187,9 @@ > * for established connections, or (syn_retries + syn_linear_timeouts) times > * during the handshake, reset the connection > * > + * - RTO_INIT_AFTER_SYN_RETRIES: if SYN retries happened during handshake and > + * RTO is less than this, re-initialise RTO to this for data retransmissions > + * > * - FIN_TIMEOUT: if a FIN segment was sent to tap/guest (flag ACK_FROM_TAP_DUE > * with TAP_FIN_SENT event), and no ACK is received within this time, reset > * the connection > @@ -340,6 +343,7 @@ enum { > > #define ACK_INTERVAL 10 /* ms */ > #define RTO_INIT 1 /* s, RFC 6298 */ > +#define RTO_INIT_AFTER_SYN_RETRIES 3 /* s, RFC 6298 */ > #define FIN_TIMEOUT 60 > #define ACT_TIMEOUT 7200 > > @@ -365,9 +369,11 @@ uint8_t tcp_migrate_rcv_queue [TCP_MIGRATE_RCV_QUEUE_MAX]; > > #define SYN_RETRIES "/proc/sys/net/ipv4/tcp_syn_retries" > #define SYN_LINEAR_TIMEOUTS "/proc/sys/net/ipv4/tcp_syn_linear_timeouts" > +#define RTO_MAX_MS "/proc/sys/net/ipv4/tcp_rto_max_ms" > > #define SYN_RETRIES_DEFAULT 6 > #define SYN_LINEAR_TIMEOUTS_DEFAULT 4 > +#define RTO_MAX_MS_DEFAULT 120000 > #define MAX_SYNCNT 127 /* derived from kernel's limit */ > > /* "Extended" data (not stored in the flow table) for TCP flow migration */ > @@ -392,7 +398,7 @@ static const char *tcp_state_str[] __attribute((__unused__)) = { > > static const char *tcp_flag_str[] __attribute((__unused__)) = { > "STALLED", "LOCAL", "ACTIVE_CLOSE", "ACK_TO_TAP_DUE", > - "ACK_FROM_TAP_DUE", "ACK_FROM_TAP_BLOCKS", > + "ACK_FROM_TAP_DUE", "ACK_FROM_TAP_BLOCKS", "SYN_RETRIED", > }; > > /* Listening sockets, used for automatic port forwarding in pasta mode only */ > @@ -590,10 +596,13 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn) > if (conn->flags & ACK_TO_TAP_DUE) { > it.it_value.tv_nsec = (long)ACK_INTERVAL * 1000 * 1000; > } else if (conn->flags & ACK_FROM_TAP_DUE) { > - int exp = conn->retries; > + int exp = conn->retries, timeout = RTO_INIT; > if (!(conn->events & ESTABLISHED)) > exp -= c->tcp.syn_linear_timeouts; > - it.it_value.tv_sec = RTO_INIT << MAX(exp, 0); > + else if (conn->flags & SYN_RETRIED) > + timeout = MAX(timeout, RTO_INIT_AFTER_SYN_RETRIES); > + timeout <<= MAX(exp, 0); > + it.it_value.tv_sec = MIN(timeout, c->tcp.rto_max); > } else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) { > it.it_value.tv_sec = FIN_TIMEOUT; > } else { > @@ -2441,6 +2450,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref) > flow_trace(conn, "SYN timeout, retry"); > tcp_send_flag(c, conn, SYN); > conn->retries++; > + conn_flag(c, conn, SYN_RETRIED); > tcp_timer_ctl(c, conn); > } > } else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) { > @@ -2812,10 +2822,15 @@ static void tcp_get_rto_params(struct ctx *c) > v = read_file_integer(SYN_LINEAR_TIMEOUTS, SYN_LINEAR_TIMEOUTS_DEFAULT); > c->tcp.syn_linear_timeouts = MIN(v, MAX_SYNCNT); > > + v = read_file_integer(RTO_MAX_MS, RTO_MAX_MS_DEFAULT); > + c->tcp.rto_max = MIN(DIV_ROUND_UP(v, 1000), INT_MAX); > + > debug("Using TCP RTO parameters, syn_retries: %"PRIu8 > - ", syn_linear_timeouts: %"PRIu8, > + ", syn_linear_timeouts: %"PRIu8 > + ", rto_max: %d", > c->tcp.syn_retries, > - c->tcp.syn_linear_timeouts); > + c->tcp.syn_linear_timeouts, > + c->tcp.rto_max); > } > > /** > diff --git a/tcp.h b/tcp.h > index 37d7758..6fb6f92 100644 > --- a/tcp.h > +++ b/tcp.h > @@ -60,6 +60,7 @@ union tcp_listen_epoll_ref { > * @fwd_out: Port forwarding configuration for outbound packets > * @timer_run: Timestamp of most recent timer run > * @pipe_size: Size of pipes for spliced connections > + * @rto_max: Maximum retry timeout (in s) > * @syn_retries: SYN retries using exponential backoff timeout > * @syn_linear_timeouts: SYN retries before using exponential backoff timeout > */ > @@ -68,6 +69,7 @@ struct tcp_ctx { > struct fwd_ports fwd_out; > struct timespec timer_run; > size_t pipe_size; > + int rto_max; > uint8_t syn_retries; > uint8_t syn_linear_timeouts; > }; > diff --git a/tcp_conn.h b/tcp_conn.h > index 923af36..e36910c 100644 > --- a/tcp_conn.h > +++ b/tcp_conn.h > @@ -77,6 +77,7 @@ struct tcp_tap_conn { > #define ACK_TO_TAP_DUE BIT(3) > #define ACK_FROM_TAP_DUE BIT(4) > #define ACK_FROM_TAP_BLOCKS BIT(5) > +#define SYN_RETRIED BIT(6) > > #define SNDBUF_BITS 24 > unsigned int sndbuf :SNDBUF_BITS; > -- > 2.51.1 > -- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson