public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
* [PATCH 0/2] tcp: Runtime checks for availability of TCP_INFO fields
@ 2024-10-23  0:42 David Gibson
  2024-10-23  0:42 ` [PATCH 1/2] tcp: Generalise probing for tcpi_snd_wnd field David Gibson
  2024-10-23  0:42 ` [PATCH 2/2] tcp: Use runtime tests for TCP_INFO fields David Gibson
  0 siblings, 2 replies; 5+ messages in thread
From: David Gibson @ 2024-10-23  0:42 UTC (permalink / raw)
  To: Stefano Brivio, passt-dev; +Cc: David Gibson

We have a number of Makefile tests for the presence of various fields
in tcp_info.  However that only tells us if they're present in the
kernel headers against which we're compiling, not whether they're
available in the kernel we're running on.  For anything outside the
earliest forms of TCP_INFO (as described in netinet/tcp.h and shared
with BSD), we should verify at runtime if the kernel is returning that
information.

This was supposed to be leading into improved handling of lazily
calling TCP_INFO, but I hit additional complications there, so I've
postponed it.  Might as well get these bits merged, though.

David Gibson (2):
  tcp: Generalise probing for tcpi_snd_wnd field
  tcp: Use runtime tests for TCP_INFO fields

 tcp.c | 97 +++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 57 insertions(+), 40 deletions(-)

-- 
2.47.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/2] tcp: Generalise probing for tcpi_snd_wnd field
  2024-10-23  0:42 [PATCH 0/2] tcp: Runtime checks for availability of TCP_INFO fields David Gibson
@ 2024-10-23  0:42 ` David Gibson
  2024-10-23  0:42 ` [PATCH 2/2] tcp: Use runtime tests for TCP_INFO fields David Gibson
  1 sibling, 0 replies; 5+ messages in thread
From: David Gibson @ 2024-10-23  0:42 UTC (permalink / raw)
  To: Stefano Brivio, passt-dev; +Cc: David Gibson

In order to use the tcpi_snd_wnd field from the TCPI_INFO getsockopt() we
need both for the field to be present in the headers (HAS_SND_WND) and for
the runtime kernel to actually report it (snd_wnd_cap).

In fact we really need to check the same things for every tcp_info field
we want to use.  Prepare to do that, by generalising the probing for the
tcpi_snd_wnd field which set a single bool to instead record the size of
the returned tcp_info structure.  We can then use that recorded value to
check for the presence of any field we need.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 tcp.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/tcp.c b/tcp.c
index 0d22e07..3cca5c6 100644
--- a/tcp.c
+++ b/tcp.c
@@ -365,11 +365,18 @@ char		tcp_buf_discard		[MAX_WINDOW];
 
 /* Does the kernel support TCP_PEEK_OFF? */
 bool peek_offset_cap;
+/* Size of data returned by TCP_INFO getsockopt() */
+socklen_t tcp_info_size;
+
+#define tcp_info_cap(f_)						\
+	((offsetof(struct tcp_info, tcpi_##f_) +			\
+	  sizeof(((struct tcp_info *)NULL)->tcpi_##f_)) <= tcp_info_size)
+
 #ifdef HAS_SND_WND
 /* Does the kernel report sending window in TCP_INFO (kernel commit
  * 8f7baad7f035)
  */
-bool snd_wnd_cap;
+#define snd_wnd_cap	tcp_info_cap(snd_wnd)
 #else
 #define snd_wnd_cap	(false)
 #endif
@@ -2578,13 +2585,12 @@ static bool tcp_probe_peek_offset_cap(sa_family_t af)
 	return ret;
 }
 
-#ifdef HAS_SND_WND
 /**
- * tcp_probe_snd_wnd_cap() - Check if TCP_INFO reports tcpi_snd_wnd
+ * tcp_probe_tcp_info() - Check what data TCP_INFO reports
  *
- * Return: true if supported, false otherwise
+ * Return: Number of bytes returned by TCP_INFO getsockopt()
  */
-static bool tcp_probe_snd_wnd_cap(void)
+static socklen_t tcp_probe_tcp_info(void)
 {
 	struct tcp_info tinfo;
 	socklen_t sl = sizeof(tinfo);
@@ -2604,13 +2610,8 @@ static bool tcp_probe_snd_wnd_cap(void)
 
 	close(s);
 
-	if (sl < (offsetof(struct tcp_info, tcpi_snd_wnd) +
-		  sizeof(tinfo.tcpi_snd_wnd)))
-		return false;
-
-	return true;
+	return sl;
 }
-#endif /* HAS_SND_WND */
 
 /**
  * tcp_init() - Get initial sequence, hash secret, initialise per-socket data
@@ -2645,11 +2646,14 @@ int tcp_init(struct ctx *c)
 			  (!c->ifi6 || tcp_probe_peek_offset_cap(AF_INET6));
 	debug("SO_PEEK_OFF%ssupported", peek_offset_cap ? " " : " not ");
 
+	tcp_info_size = tcp_probe_tcp_info();
+
+#define dbg_tcpi(f_)	debug("TCP_INFO tcpi_%s field%s supported",	\
+			      STRINGIFY(f_), tcp_info_cap(f_) ? " " : " not ")
 #ifdef HAS_SND_WND
-	snd_wnd_cap = tcp_probe_snd_wnd_cap();
+	dbg_tcpi(snd_wnd);
 #endif
-	debug("TCP_INFO tcpi_snd_wnd field%ssupported",
-	      snd_wnd_cap ? " " : " not ");
+#undef dbg_tcpi
 
 	return 0;
 }
-- 
@@ -365,11 +365,18 @@ char		tcp_buf_discard		[MAX_WINDOW];
 
 /* Does the kernel support TCP_PEEK_OFF? */
 bool peek_offset_cap;
+/* Size of data returned by TCP_INFO getsockopt() */
+socklen_t tcp_info_size;
+
+#define tcp_info_cap(f_)						\
+	((offsetof(struct tcp_info, tcpi_##f_) +			\
+	  sizeof(((struct tcp_info *)NULL)->tcpi_##f_)) <= tcp_info_size)
+
 #ifdef HAS_SND_WND
 /* Does the kernel report sending window in TCP_INFO (kernel commit
  * 8f7baad7f035)
  */
-bool snd_wnd_cap;
+#define snd_wnd_cap	tcp_info_cap(snd_wnd)
 #else
 #define snd_wnd_cap	(false)
 #endif
@@ -2578,13 +2585,12 @@ static bool tcp_probe_peek_offset_cap(sa_family_t af)
 	return ret;
 }
 
-#ifdef HAS_SND_WND
 /**
- * tcp_probe_snd_wnd_cap() - Check if TCP_INFO reports tcpi_snd_wnd
+ * tcp_probe_tcp_info() - Check what data TCP_INFO reports
  *
- * Return: true if supported, false otherwise
+ * Return: Number of bytes returned by TCP_INFO getsockopt()
  */
-static bool tcp_probe_snd_wnd_cap(void)
+static socklen_t tcp_probe_tcp_info(void)
 {
 	struct tcp_info tinfo;
 	socklen_t sl = sizeof(tinfo);
@@ -2604,13 +2610,8 @@ static bool tcp_probe_snd_wnd_cap(void)
 
 	close(s);
 
-	if (sl < (offsetof(struct tcp_info, tcpi_snd_wnd) +
-		  sizeof(tinfo.tcpi_snd_wnd)))
-		return false;
-
-	return true;
+	return sl;
 }
-#endif /* HAS_SND_WND */
 
 /**
  * tcp_init() - Get initial sequence, hash secret, initialise per-socket data
@@ -2645,11 +2646,14 @@ int tcp_init(struct ctx *c)
 			  (!c->ifi6 || tcp_probe_peek_offset_cap(AF_INET6));
 	debug("SO_PEEK_OFF%ssupported", peek_offset_cap ? " " : " not ");
 
+	tcp_info_size = tcp_probe_tcp_info();
+
+#define dbg_tcpi(f_)	debug("TCP_INFO tcpi_%s field%s supported",	\
+			      STRINGIFY(f_), tcp_info_cap(f_) ? " " : " not ")
 #ifdef HAS_SND_WND
-	snd_wnd_cap = tcp_probe_snd_wnd_cap();
+	dbg_tcpi(snd_wnd);
 #endif
-	debug("TCP_INFO tcpi_snd_wnd field%ssupported",
-	      snd_wnd_cap ? " " : " not ");
+#undef dbg_tcpi
 
 	return 0;
 }
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/2] tcp: Use runtime tests for TCP_INFO fields
  2024-10-23  0:42 [PATCH 0/2] tcp: Runtime checks for availability of TCP_INFO fields David Gibson
  2024-10-23  0:42 ` [PATCH 1/2] tcp: Generalise probing for tcpi_snd_wnd field David Gibson
@ 2024-10-23  0:42 ` David Gibson
  2024-10-23  7:41   ` Stefano Brivio
  1 sibling, 1 reply; 5+ messages in thread
From: David Gibson @ 2024-10-23  0:42 UTC (permalink / raw)
  To: Stefano Brivio, passt-dev; +Cc: David Gibson

In order to use particular fields from the TCP_INFO getsockopt() we
need them to be in the version of the structure we have defined.  We
test this in the Makefile, setting HAS_BYTES_ACKED and HAS_MIN_RTT
accordingly.

However, we also need the fields to be present in the runtime kernel
we're using, which we don't currently check for those fields.  Add
logic similar to that for tcpi_snd_wnd to check for these fields too
instead of just using the compile time check.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 tcp.c | 65 +++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 39 insertions(+), 26 deletions(-)

diff --git a/tcp.c b/tcp.c
index 3cca5c6..b5ef1f1 100644
--- a/tcp.c
+++ b/tcp.c
@@ -380,6 +380,19 @@ socklen_t tcp_info_size;
 #else
 #define snd_wnd_cap	(false)
 #endif
+#ifdef HAS_BYTES_ACKED
+/* Does the kernel report bytes acked in TCP_INFO (kernel commit 0df48c26d84) */
+#define bytes_acked_cap	tcp_info_cap(bytes_acked)
+#else
+#define bytes_acked_cap	(false)
+#endif
+#ifdef HAS_MIN_RTT
+/* Does the kernel report minimum RTT TCP_INFO (kernel commit cd9b266095f4) */
+#define min_rtt_cap	tcp_info_cap(min_rtt)
+#else
+#define min_rtt_cap	(false)
+#endif
+
 
 /* sendmsg() to socket */
 static struct iovec	tcp_iov			[UIO_MAXIOV];
@@ -687,11 +700,10 @@ static int tcp_rtt_dst_low(const struct tcp_tap_conn *conn)
 static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn,
 			      const struct tcp_info *tinfo)
 {
-#ifdef HAS_MIN_RTT
 	const struct flowside *tapside = TAPFLOW(conn);
 	int i, hole = -1;
 
-	if (!tinfo->tcpi_min_rtt ||
+	if (!min_rtt_cap ||
 	    (int)tinfo->tcpi_min_rtt > LOW_RTT_THRESHOLD)
 		return;
 
@@ -712,10 +724,6 @@ static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn,
 	if (hole == LOW_RTT_TABLE_SIZE)
 		hole = 0;
 	inany_from_af(low_rtt_dst + hole, AF_INET6, &in6addr_any);
-#else
-	(void)conn;
-	(void)tinfo;
-#endif /* HAS_MIN_RTT */
 }
 
 /**
@@ -1131,30 +1139,29 @@ int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
 	uint32_t new_wnd_to_tap = prev_wnd_to_tap;
 	int s = conn->sock;
 
-#ifndef HAS_BYTES_ACKED
-	(void)force_seq;
-
-	conn->seq_ack_to_tap = conn->seq_from_tap;
-	if (SEQ_LT(conn->seq_ack_to_tap, prev_ack_to_tap))
-		conn->seq_ack_to_tap = prev_ack_to_tap;
-#else
-	if ((unsigned)SNDBUF_GET(conn) < SNDBUF_SMALL || tcp_rtt_dst_low(conn)
-	    || CONN_IS_CLOSING(conn) || (conn->flags & LOCAL) || force_seq) {
+	if (!bytes_acked_cap) {
 		conn->seq_ack_to_tap = conn->seq_from_tap;
-	} else if (conn->seq_ack_to_tap != conn->seq_from_tap) {
-		if (!tinfo) {
-			tinfo = &tinfo_new;
-			if (getsockopt(s, SOL_TCP, TCP_INFO, tinfo, &sl))
-				return 0;
-		}
-
-		conn->seq_ack_to_tap = tinfo->tcpi_bytes_acked +
-				       conn->seq_init_from_tap;
-
 		if (SEQ_LT(conn->seq_ack_to_tap, prev_ack_to_tap))
 			conn->seq_ack_to_tap = prev_ack_to_tap;
+	} else {
+		if ((unsigned)SNDBUF_GET(conn) < SNDBUF_SMALL ||
+		    tcp_rtt_dst_low(conn) || CONN_IS_CLOSING(conn) ||
+		    (conn->flags & LOCAL) || force_seq) {
+			conn->seq_ack_to_tap = conn->seq_from_tap;
+		} else if (conn->seq_ack_to_tap != conn->seq_from_tap) {
+			if (!tinfo) {
+				tinfo = &tinfo_new;
+				if (getsockopt(s, SOL_TCP, TCP_INFO, tinfo, &sl))
+					return 0;
+			}
+
+			conn->seq_ack_to_tap = tinfo->tcpi_bytes_acked +
+				conn->seq_init_from_tap;
+
+			if (SEQ_LT(conn->seq_ack_to_tap, prev_ack_to_tap))
+				conn->seq_ack_to_tap = prev_ack_to_tap;
+		}
 	}
-#endif /* !HAS_BYTES_ACKED */
 
 	if (!snd_wnd_cap) {
 		tcp_get_sndbuf(conn);
@@ -2653,6 +2660,12 @@ int tcp_init(struct ctx *c)
 #ifdef HAS_SND_WND
 	dbg_tcpi(snd_wnd);
 #endif
+#ifdef HAS_BYTES_ACKED
+	dbg_tcpi(bytes_acked);
+#endif
+#ifdef HAS_MIN_RTT
+	dbg_tcpi(min_rtt);
+#endif
 #undef dbg_tcpi
 
 	return 0;
-- 
@@ -380,6 +380,19 @@ socklen_t tcp_info_size;
 #else
 #define snd_wnd_cap	(false)
 #endif
+#ifdef HAS_BYTES_ACKED
+/* Does the kernel report bytes acked in TCP_INFO (kernel commit 0df48c26d84) */
+#define bytes_acked_cap	tcp_info_cap(bytes_acked)
+#else
+#define bytes_acked_cap	(false)
+#endif
+#ifdef HAS_MIN_RTT
+/* Does the kernel report minimum RTT TCP_INFO (kernel commit cd9b266095f4) */
+#define min_rtt_cap	tcp_info_cap(min_rtt)
+#else
+#define min_rtt_cap	(false)
+#endif
+
 
 /* sendmsg() to socket */
 static struct iovec	tcp_iov			[UIO_MAXIOV];
@@ -687,11 +700,10 @@ static int tcp_rtt_dst_low(const struct tcp_tap_conn *conn)
 static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn,
 			      const struct tcp_info *tinfo)
 {
-#ifdef HAS_MIN_RTT
 	const struct flowside *tapside = TAPFLOW(conn);
 	int i, hole = -1;
 
-	if (!tinfo->tcpi_min_rtt ||
+	if (!min_rtt_cap ||
 	    (int)tinfo->tcpi_min_rtt > LOW_RTT_THRESHOLD)
 		return;
 
@@ -712,10 +724,6 @@ static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn,
 	if (hole == LOW_RTT_TABLE_SIZE)
 		hole = 0;
 	inany_from_af(low_rtt_dst + hole, AF_INET6, &in6addr_any);
-#else
-	(void)conn;
-	(void)tinfo;
-#endif /* HAS_MIN_RTT */
 }
 
 /**
@@ -1131,30 +1139,29 @@ int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
 	uint32_t new_wnd_to_tap = prev_wnd_to_tap;
 	int s = conn->sock;
 
-#ifndef HAS_BYTES_ACKED
-	(void)force_seq;
-
-	conn->seq_ack_to_tap = conn->seq_from_tap;
-	if (SEQ_LT(conn->seq_ack_to_tap, prev_ack_to_tap))
-		conn->seq_ack_to_tap = prev_ack_to_tap;
-#else
-	if ((unsigned)SNDBUF_GET(conn) < SNDBUF_SMALL || tcp_rtt_dst_low(conn)
-	    || CONN_IS_CLOSING(conn) || (conn->flags & LOCAL) || force_seq) {
+	if (!bytes_acked_cap) {
 		conn->seq_ack_to_tap = conn->seq_from_tap;
-	} else if (conn->seq_ack_to_tap != conn->seq_from_tap) {
-		if (!tinfo) {
-			tinfo = &tinfo_new;
-			if (getsockopt(s, SOL_TCP, TCP_INFO, tinfo, &sl))
-				return 0;
-		}
-
-		conn->seq_ack_to_tap = tinfo->tcpi_bytes_acked +
-				       conn->seq_init_from_tap;
-
 		if (SEQ_LT(conn->seq_ack_to_tap, prev_ack_to_tap))
 			conn->seq_ack_to_tap = prev_ack_to_tap;
+	} else {
+		if ((unsigned)SNDBUF_GET(conn) < SNDBUF_SMALL ||
+		    tcp_rtt_dst_low(conn) || CONN_IS_CLOSING(conn) ||
+		    (conn->flags & LOCAL) || force_seq) {
+			conn->seq_ack_to_tap = conn->seq_from_tap;
+		} else if (conn->seq_ack_to_tap != conn->seq_from_tap) {
+			if (!tinfo) {
+				tinfo = &tinfo_new;
+				if (getsockopt(s, SOL_TCP, TCP_INFO, tinfo, &sl))
+					return 0;
+			}
+
+			conn->seq_ack_to_tap = tinfo->tcpi_bytes_acked +
+				conn->seq_init_from_tap;
+
+			if (SEQ_LT(conn->seq_ack_to_tap, prev_ack_to_tap))
+				conn->seq_ack_to_tap = prev_ack_to_tap;
+		}
 	}
-#endif /* !HAS_BYTES_ACKED */
 
 	if (!snd_wnd_cap) {
 		tcp_get_sndbuf(conn);
@@ -2653,6 +2660,12 @@ int tcp_init(struct ctx *c)
 #ifdef HAS_SND_WND
 	dbg_tcpi(snd_wnd);
 #endif
+#ifdef HAS_BYTES_ACKED
+	dbg_tcpi(bytes_acked);
+#endif
+#ifdef HAS_MIN_RTT
+	dbg_tcpi(min_rtt);
+#endif
 #undef dbg_tcpi
 
 	return 0;
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] tcp: Use runtime tests for TCP_INFO fields
  2024-10-23  0:42 ` [PATCH 2/2] tcp: Use runtime tests for TCP_INFO fields David Gibson
@ 2024-10-23  7:41   ` Stefano Brivio
  2024-10-24  2:02     ` David Gibson
  0 siblings, 1 reply; 5+ messages in thread
From: Stefano Brivio @ 2024-10-23  7:41 UTC (permalink / raw)
  To: David Gibson; +Cc: passt-dev

On Wed, 23 Oct 2024 11:42:53 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> In order to use particular fields from the TCP_INFO getsockopt() we
> need them to be in the version of the structure we have defined.  We
> test this in the Makefile, setting HAS_BYTES_ACKED and HAS_MIN_RTT
> accordingly.
> 
> However, we also need the fields to be present in the runtime kernel
> we're using, which we don't currently check for those fields.  Add
> logic similar to that for tcpi_snd_wnd to check for these fields too
> instead of just using the compile time check.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  tcp.c | 65 +++++++++++++++++++++++++++++++++++------------------------
>  1 file changed, 39 insertions(+), 26 deletions(-)
> 
> diff --git a/tcp.c b/tcp.c
> index 3cca5c6..b5ef1f1 100644
> --- a/tcp.c
> +++ b/tcp.c
> @@ -380,6 +380,19 @@ socklen_t tcp_info_size;
>  #else
>  #define snd_wnd_cap	(false)
>  #endif
> +#ifdef HAS_BYTES_ACKED
> +/* Does the kernel report bytes acked in TCP_INFO (kernel commit 0df48c26d84) */
> +#define bytes_acked_cap	tcp_info_cap(bytes_acked)
> +#else
> +#define bytes_acked_cap	(false)
> +#endif
> +#ifdef HAS_MIN_RTT
> +/* Does the kernel report minimum RTT TCP_INFO (kernel commit cd9b266095f4) */

Nit: "in TCP_INFO". This becomes too long, but perhaps:

/* Kernel reports minimum RTT in TCP_INFO (kernel commit cd9b266095f4) */

(and similar above) is just as informative.

> +#define min_rtt_cap	tcp_info_cap(min_rtt)
> +#else
> +#define min_rtt_cap	(false)
> +#endif
> +
>  
>  /* sendmsg() to socket */
>  static struct iovec	tcp_iov			[UIO_MAXIOV];
> @@ -687,11 +700,10 @@ static int tcp_rtt_dst_low(const struct tcp_tap_conn *conn)
>  static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn,
>  			      const struct tcp_info *tinfo)
>  {
> -#ifdef HAS_MIN_RTT

I think we should keep those conditionals as well, because if
tcpi_min_rtt is not defined in the headers:

>  	const struct flowside *tapside = TAPFLOW(conn);
>  	int i, hole = -1;
>  
> -	if (!tinfo->tcpi_min_rtt ||
> +	if (!min_rtt_cap ||
>  	    (int)tinfo->tcpi_min_rtt > LOW_RTT_THRESHOLD)

...this won't build.

>  		return;
>  
> @@ -712,10 +724,6 @@ static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn,
>  	if (hole == LOW_RTT_TABLE_SIZE)
>  		hole = 0;
>  	inany_from_af(low_rtt_dst + hole, AF_INET6, &in6addr_any);
> -#else
> -	(void)conn;
> -	(void)tinfo;
> -#endif /* HAS_MIN_RTT */
>  }
>  
>  /**
> @@ -1131,30 +1139,29 @@ int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
>  	uint32_t new_wnd_to_tap = prev_wnd_to_tap;
>  	int s = conn->sock;
>  
> -#ifndef HAS_BYTES_ACKED

Same here:

> -	(void)force_seq;
> -
> -	conn->seq_ack_to_tap = conn->seq_from_tap;
> -	if (SEQ_LT(conn->seq_ack_to_tap, prev_ack_to_tap))
> -		conn->seq_ack_to_tap = prev_ack_to_tap;
> -#else
> -	if ((unsigned)SNDBUF_GET(conn) < SNDBUF_SMALL || tcp_rtt_dst_low(conn)
> -	    || CONN_IS_CLOSING(conn) || (conn->flags & LOCAL) || force_seq) {
> +	if (!bytes_acked_cap) {
>  		conn->seq_ack_to_tap = conn->seq_from_tap;
> -	} else if (conn->seq_ack_to_tap != conn->seq_from_tap) {
> -		if (!tinfo) {
> -			tinfo = &tinfo_new;
> -			if (getsockopt(s, SOL_TCP, TCP_INFO, tinfo, &sl))
> -				return 0;
> -		}
> -
> -		conn->seq_ack_to_tap = tinfo->tcpi_bytes_acked +
> -				       conn->seq_init_from_tap;
> -
>  		if (SEQ_LT(conn->seq_ack_to_tap, prev_ack_to_tap))
>  			conn->seq_ack_to_tap = prev_ack_to_tap;
> +	} else {
> +		if ((unsigned)SNDBUF_GET(conn) < SNDBUF_SMALL ||
> +		    tcp_rtt_dst_low(conn) || CONN_IS_CLOSING(conn) ||
> +		    (conn->flags & LOCAL) || force_seq) {
> +			conn->seq_ack_to_tap = conn->seq_from_tap;
> +		} else if (conn->seq_ack_to_tap != conn->seq_from_tap) {
> +			if (!tinfo) {
> +				tinfo = &tinfo_new;
> +				if (getsockopt(s, SOL_TCP, TCP_INFO, tinfo, &sl))
> +					return 0;
> +			}
> +
> +			conn->seq_ack_to_tap = tinfo->tcpi_bytes_acked +

...this won't build.

> +				conn->seq_init_from_tap;
> +
> +			if (SEQ_LT(conn->seq_ack_to_tap, prev_ack_to_tap))
> +				conn->seq_ack_to_tap = prev_ack_to_tap;
> +		}
>  	}
> -#endif /* !HAS_BYTES_ACKED */
>  
>  	if (!snd_wnd_cap) {
>  		tcp_get_sndbuf(conn);
> @@ -2653,6 +2660,12 @@ int tcp_init(struct ctx *c)
>  #ifdef HAS_SND_WND
>  	dbg_tcpi(snd_wnd);
>  #endif
> +#ifdef HAS_BYTES_ACKED
> +	dbg_tcpi(bytes_acked);
> +#endif
> +#ifdef HAS_MIN_RTT
> +	dbg_tcpi(min_rtt);
> +#endif
>  #undef dbg_tcpi
>  
>  	return 0;

Everything else looks good to me.

-- 
Stefano


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] tcp: Use runtime tests for TCP_INFO fields
  2024-10-23  7:41   ` Stefano Brivio
@ 2024-10-24  2:02     ` David Gibson
  0 siblings, 0 replies; 5+ messages in thread
From: David Gibson @ 2024-10-24  2:02 UTC (permalink / raw)
  To: Stefano Brivio; +Cc: passt-dev

[-- Attachment #1: Type: text/plain, Size: 5470 bytes --]

On Wed, Oct 23, 2024 at 09:41:39AM +0200, Stefano Brivio wrote:
> On Wed, 23 Oct 2024 11:42:53 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > In order to use particular fields from the TCP_INFO getsockopt() we
> > need them to be in the version of the structure we have defined.  We
> > test this in the Makefile, setting HAS_BYTES_ACKED and HAS_MIN_RTT
> > accordingly.
> > 
> > However, we also need the fields to be present in the runtime kernel
> > we're using, which we don't currently check for those fields.  Add
> > logic similar to that for tcpi_snd_wnd to check for these fields too
> > instead of just using the compile time check.
> > 
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> >  tcp.c | 65 +++++++++++++++++++++++++++++++++++------------------------
> >  1 file changed, 39 insertions(+), 26 deletions(-)
> > 
> > diff --git a/tcp.c b/tcp.c
> > index 3cca5c6..b5ef1f1 100644
> > --- a/tcp.c
> > +++ b/tcp.c
> > @@ -380,6 +380,19 @@ socklen_t tcp_info_size;
> >  #else
> >  #define snd_wnd_cap	(false)
> >  #endif
> > +#ifdef HAS_BYTES_ACKED
> > +/* Does the kernel report bytes acked in TCP_INFO (kernel commit 0df48c26d84) */
> > +#define bytes_acked_cap	tcp_info_cap(bytes_acked)
> > +#else
> > +#define bytes_acked_cap	(false)
> > +#endif
> > +#ifdef HAS_MIN_RTT
> > +/* Does the kernel report minimum RTT TCP_INFO (kernel commit cd9b266095f4) */
> 
> Nit: "in TCP_INFO". This becomes too long, but perhaps:
> 
> /* Kernel reports minimum RTT in TCP_INFO (kernel commit cd9b266095f4) */
> 
> (and similar above) is just as informative.

Good idea, adjusted.

> 
> > +#define min_rtt_cap	tcp_info_cap(min_rtt)
> > +#else
> > +#define min_rtt_cap	(false)
> > +#endif
> > +
> >  
> >  /* sendmsg() to socket */
> >  static struct iovec	tcp_iov			[UIO_MAXIOV];
> > @@ -687,11 +700,10 @@ static int tcp_rtt_dst_low(const struct tcp_tap_conn *conn)
> >  static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn,
> >  			      const struct tcp_info *tinfo)
> >  {
> > -#ifdef HAS_MIN_RTT
> 
> I think we should keep those conditionals as well, because if
> tcpi_min_rtt is not defined in the headers:
> 
> >  	const struct flowside *tapside = TAPFLOW(conn);
> >  	int i, hole = -1;
> >  
> > -	if (!tinfo->tcpi_min_rtt ||
> > +	if (!min_rtt_cap ||
> >  	    (int)tinfo->tcpi_min_rtt > LOW_RTT_THRESHOLD)
> 
> ...this won't build.

Ah, bother.  Earlier drafts had a field-getter macro which removed the
need for ifdefs outside its definition.  Then I removed it to simplify
things, but forgot that needed the ifdefs to come back.

> >  		return;
> >  
> > @@ -712,10 +724,6 @@ static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn,
> >  	if (hole == LOW_RTT_TABLE_SIZE)
> >  		hole = 0;
> >  	inany_from_af(low_rtt_dst + hole, AF_INET6, &in6addr_any);
> > -#else
> > -	(void)conn;
> > -	(void)tinfo;
> > -#endif /* HAS_MIN_RTT */
> >  }
> >  
> >  /**
> > @@ -1131,30 +1139,29 @@ int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
> >  	uint32_t new_wnd_to_tap = prev_wnd_to_tap;
> >  	int s = conn->sock;
> >  
> > -#ifndef HAS_BYTES_ACKED
> 
> Same here:
> 
> > -	(void)force_seq;
> > -
> > -	conn->seq_ack_to_tap = conn->seq_from_tap;
> > -	if (SEQ_LT(conn->seq_ack_to_tap, prev_ack_to_tap))
> > -		conn->seq_ack_to_tap = prev_ack_to_tap;
> > -#else
> > -	if ((unsigned)SNDBUF_GET(conn) < SNDBUF_SMALL || tcp_rtt_dst_low(conn)
> > -	    || CONN_IS_CLOSING(conn) || (conn->flags & LOCAL) || force_seq) {
> > +	if (!bytes_acked_cap) {
> >  		conn->seq_ack_to_tap = conn->seq_from_tap;
> > -	} else if (conn->seq_ack_to_tap != conn->seq_from_tap) {
> > -		if (!tinfo) {
> > -			tinfo = &tinfo_new;
> > -			if (getsockopt(s, SOL_TCP, TCP_INFO, tinfo, &sl))
> > -				return 0;
> > -		}
> > -
> > -		conn->seq_ack_to_tap = tinfo->tcpi_bytes_acked +
> > -				       conn->seq_init_from_tap;
> > -
> >  		if (SEQ_LT(conn->seq_ack_to_tap, prev_ack_to_tap))
> >  			conn->seq_ack_to_tap = prev_ack_to_tap;
> > +	} else {
> > +		if ((unsigned)SNDBUF_GET(conn) < SNDBUF_SMALL ||
> > +		    tcp_rtt_dst_low(conn) || CONN_IS_CLOSING(conn) ||
> > +		    (conn->flags & LOCAL) || force_seq) {
> > +			conn->seq_ack_to_tap = conn->seq_from_tap;
> > +		} else if (conn->seq_ack_to_tap != conn->seq_from_tap) {
> > +			if (!tinfo) {
> > +				tinfo = &tinfo_new;
> > +				if (getsockopt(s, SOL_TCP, TCP_INFO, tinfo, &sl))
> > +					return 0;
> > +			}
> > +
> > +			conn->seq_ack_to_tap = tinfo->tcpi_bytes_acked +
> 
> ...this won't build.
> 
> > +				conn->seq_init_from_tap;
> > +
> > +			if (SEQ_LT(conn->seq_ack_to_tap, prev_ack_to_tap))
> > +				conn->seq_ack_to_tap = prev_ack_to_tap;
> > +		}
> >  	}
> > -#endif /* !HAS_BYTES_ACKED */
> >  
> >  	if (!snd_wnd_cap) {
> >  		tcp_get_sndbuf(conn);
> > @@ -2653,6 +2660,12 @@ int tcp_init(struct ctx *c)
> >  #ifdef HAS_SND_WND
> >  	dbg_tcpi(snd_wnd);
> >  #endif
> > +#ifdef HAS_BYTES_ACKED
> > +	dbg_tcpi(bytes_acked);
> > +#endif
> > +#ifdef HAS_MIN_RTT
> > +	dbg_tcpi(min_rtt);
> > +#endif
> >  #undef dbg_tcpi
> >  
> >  	return 0;
> 
> Everything else looks good to me.
> 

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-10-24  3:38 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-10-23  0:42 [PATCH 0/2] tcp: Runtime checks for availability of TCP_INFO fields David Gibson
2024-10-23  0:42 ` [PATCH 1/2] tcp: Generalise probing for tcpi_snd_wnd field David Gibson
2024-10-23  0:42 ` [PATCH 2/2] tcp: Use runtime tests for TCP_INFO fields David Gibson
2024-10-23  7:41   ` Stefano Brivio
2024-10-24  2:02     ` David Gibson

Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).