From mboxrd@z Thu Jan 1 00:00:00 1970 Authentication-Results: passt.top; dmarc=pass (p=quarantine dis=none) header.from=redhat.com Authentication-Results: passt.top; dkim=pass (1024-bit key; unprotected) header.d=redhat.com header.i=@redhat.com header.a=rsa-sha256 header.s=mimecast20190719 header.b=K8LBhz93; dkim-atps=neutral Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.133.124]) by passt.top (Postfix) with ESMTPS id D4BB15A0619 for ; Tue, 02 Dec 2025 04:00:40 +0100 (CET) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1764644439; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=yUkZJzl/IuJ98n/yk3H3OV8788ALpmDezWgLZQx6oi4=; b=K8LBhz93ou2BW8hj1xM5u81+2eA9cmZTTLfVVKSu66gFPn6+AdN4YwkE6uuPtSpvlx0y49 SJpDShlFyaNTtdeJiM3Jutg8kyrrKzRNT7vYPsmeBFvap4BQndH+A128BgzaT8gtH5+cLr 9HhehUJNkj4PfcQpeLSoqQTgABiAFZg= Received: from mx-prod-mc-03.mail-002.prod.us-west-2.aws.redhat.com (ec2-54-186-198-63.us-west-2.compute.amazonaws.com [54.186.198.63]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id us-mta-694-L_-2LXEhPvWS5Kr4nZv6UA-1; Mon, 01 Dec 2025 22:00:35 -0500 X-MC-Unique: L_-2LXEhPvWS5Kr4nZv6UA-1 X-Mimecast-MFC-AGG-ID: L_-2LXEhPvWS5Kr4nZv6UA_1764644434 Received: from mx-prod-int-05.mail-002.prod.us-west-2.aws.redhat.com (mx-prod-int-05.mail-002.prod.us-west-2.aws.redhat.com [10.30.177.17]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by mx-prod-mc-03.mail-002.prod.us-west-2.aws.redhat.com (Postfix) with ESMTPS id 8C43819560B2; Tue, 2 Dec 2025 03:00:34 +0000 (UTC) Received: from fedora.redhat.com (unknown [10.72.116.175]) by mx-prod-int-05.mail-002.prod.us-west-2.aws.redhat.com (Postfix) with ESMTP id 1DF721955F1A; Tue, 2 Dec 2025 03:00:31 +0000 (UTC) From: Yumei Huang To: passt-dev@passt.top, sbrivio@redhat.com Subject: [PATCH v10 5/5] tcp: Clamp the retry timeout Date: Tue, 2 Dec 2025 11:00:07 +0800 Message-ID: <20251202030007.23581-6-yuhuang@redhat.com> In-Reply-To: <20251202030007.23581-1-yuhuang@redhat.com> References: <20251202030007.23581-1-yuhuang@redhat.com> MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.0 on 10.30.177.17 X-Mimecast-Spam-Score: 0 X-Mimecast-MFC-PROC-ID: nfE0snezuj6Hwz0adWE6Ww-Nv7J9lBpuXJdUvXivziI_1764644434 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: 8bit content-type: text/plain; charset="US-ASCII"; x-default=true Message-ID-Hash: LQ5BDXP6DIB3PISDM2SEYO7ZVWHMH5JT X-Message-ID-Hash: LQ5BDXP6DIB3PISDM2SEYO7ZVWHMH5JT X-MailFrom: yuhuang@redhat.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header CC: david@gibson.dropbear.id.au, yuhuang@redhat.com X-Mailman-Version: 3.3.8 Precedence: list List-Id: Development discussion and patches for passt Archived-At: Archived-At: List-Archive: List-Archive: List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: Clamp the TCP retry timeout as Linux kernel does. If a retry occurs during the handshake and the RTO is below 3 seconds, re-initialise it to 3 seconds for data retransmissions according to RFC 6298. Suggested-by: Stefano Brivio Signed-off-by: Yumei Huang --- tcp.c | 25 ++++++++++++++++++++----- tcp.h | 2 ++ tcp_conn.h | 1 + 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/tcp.c b/tcp.c index cff23d1..3418705 100644 --- a/tcp.c +++ b/tcp.c @@ -187,6 +187,9 @@ * established connections, or (syn_retries + syn_linear_timeouts) times * during the handshake, then reset the connection * + * - RTO_INIT_AFTER_SYN_RETRIES: if SYN retries happened during handshake and + * RTO is less than this, re-initialise RTO to this for data retransmissions + * * - FIN_TIMEOUT: if a FIN segment was sent to tap/guest (flag ACK_FROM_TAP_DUE * with TAP_FIN_SENT event), and no ACK is received within this time, reset * the connection @@ -340,6 +343,7 @@ enum { #define ACK_INTERVAL 10 /* ms */ #define RTO_INIT 1 /* s, RFC 6298 */ +#define RTO_INIT_AFTER_SYN_RETRIES 3 /* s, RFC 6298 */ #define FIN_TIMEOUT 60 #define ACT_TIMEOUT 7200 @@ -365,9 +369,11 @@ uint8_t tcp_migrate_rcv_queue [TCP_MIGRATE_RCV_QUEUE_MAX]; #define SYN_RETRIES "/proc/sys/net/ipv4/tcp_syn_retries" #define SYN_LINEAR_TIMEOUTS "/proc/sys/net/ipv4/tcp_syn_linear_timeouts" +#define RTO_MAX_MS "/proc/sys/net/ipv4/tcp_rto_max_ms" #define SYN_RETRIES_DEFAULT 6 #define SYN_LINEAR_TIMEOUTS_DEFAULT 4 +#define RTO_MAX_DEFAULT 120 /* s */ #define MAX_SYNCNT 127 /* derived from kernel's limit */ /* "Extended" data (not stored in the flow table) for TCP flow migration */ @@ -392,7 +398,7 @@ static const char *tcp_state_str[] __attribute((__unused__)) = { static const char *tcp_flag_str[] __attribute((__unused__)) = { "STALLED", "LOCAL", "ACTIVE_CLOSE", "ACK_TO_TAP_DUE", - "ACK_FROM_TAP_DUE", "ACK_FROM_TAP_BLOCKS", + "ACK_FROM_TAP_DUE", "ACK_FROM_TAP_BLOCKS", "SYN_RETRIED", }; /* Listening sockets, used for automatic port forwarding in pasta mode only */ @@ -590,10 +596,13 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn) if (conn->flags & ACK_TO_TAP_DUE) { it.it_value.tv_nsec = (long)ACK_INTERVAL * 1000 * 1000; } else if (conn->flags & ACK_FROM_TAP_DUE) { - int exp = conn->retries; + int exp = conn->retries, timeout = RTO_INIT; if (!(conn->events & ESTABLISHED)) exp -= c->tcp.syn_linear_timeouts; - it.it_value.tv_sec = RTO_INIT << MAX(exp, 0); + else if (conn->flags & SYN_RETRIED) + timeout = MAX(timeout, RTO_INIT_AFTER_SYN_RETRIES); + timeout <<= MAX(exp, 0); + it.it_value.tv_sec = MIN(timeout, c->tcp.rto_max); } else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) { it.it_value.tv_sec = FIN_TIMEOUT; } else { @@ -2441,6 +2450,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref) flow_trace(conn, "SYN timeout, retry"); tcp_send_flag(c, conn, SYN); conn->retries++; + conn_flag(c, conn, SYN_RETRIED); tcp_timer_ctl(c, conn); } } else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) { @@ -2812,10 +2822,15 @@ static void tcp_get_rto_params(struct ctx *c) v = read_file_integer(SYN_LINEAR_TIMEOUTS, SYN_LINEAR_TIMEOUTS_DEFAULT); c->tcp.syn_linear_timeouts = MIN(v, MAX_SYNCNT); + v = read_file_integer(RTO_MAX_MS, (intmax_t)(RTO_MAX_DEFAULT * 1000)); + c->tcp.rto_max = MIN(DIV_ROUND_UP(v, 1000), INT_MAX); + debug("Using TCP RTO parameters, syn_retries: %"PRIu8 - ", syn_linear_timeouts: %"PRIu8, + ", syn_linear_timeouts: %"PRIu8 + ", rto_max: %d", c->tcp.syn_retries, - c->tcp.syn_linear_timeouts); + c->tcp.syn_linear_timeouts, + c->tcp.rto_max); } /** diff --git a/tcp.h b/tcp.h index 37d7758..6fb6f92 100644 --- a/tcp.h +++ b/tcp.h @@ -60,6 +60,7 @@ union tcp_listen_epoll_ref { * @fwd_out: Port forwarding configuration for outbound packets * @timer_run: Timestamp of most recent timer run * @pipe_size: Size of pipes for spliced connections + * @rto_max: Maximum retry timeout (in s) * @syn_retries: SYN retries using exponential backoff timeout * @syn_linear_timeouts: SYN retries before using exponential backoff timeout */ @@ -68,6 +69,7 @@ struct tcp_ctx { struct fwd_ports fwd_out; struct timespec timer_run; size_t pipe_size; + int rto_max; uint8_t syn_retries; uint8_t syn_linear_timeouts; }; diff --git a/tcp_conn.h b/tcp_conn.h index 923af36..e36910c 100644 --- a/tcp_conn.h +++ b/tcp_conn.h @@ -77,6 +77,7 @@ struct tcp_tap_conn { #define ACK_TO_TAP_DUE BIT(3) #define ACK_FROM_TAP_DUE BIT(4) #define ACK_FROM_TAP_BLOCKS BIT(5) +#define SYN_RETRIED BIT(6) #define SNDBUF_BITS 24 unsigned int sndbuf :SNDBUF_BITS; -- 2.51.1