public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: Stefano Brivio <sbrivio@redhat.com>
To: passt-dev@passt.top
Subject: [PATCH 07/22] tcp, netlink, HAS{BYTES_ACKED,MIN_RTT,GETRANDOM} and NETLINK_GET_STRICT_CHK
Date: Fri, 28 Jan 2022 19:33:42 +0100	[thread overview]
Message-ID: <20220128183357.3407606-8-sbrivio@redhat.com> (raw)
In-Reply-To: <20220128183357.3407606-1-sbrivio@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 4895 bytes --]

tcpi_bytes_acked and tcpi_min_rtt are only available on recent
kernel versions: provide fall-back paths (incurring some grade of
performance penalty).

Support for getrandom() was introduced in Linux 3.17 and glibc 2.25:
provide an alternate mechanism for that as well, reading from
/dev/random.

Also check if NETLINK_GET_STRICT_CHK is defined before using it:
it's not strictly needed, we'll filter out irrelevant results from
netlink anyway.

Signed-off-by: Stefano Brivio <sbrivio(a)redhat.com>
---
 Makefile  | 15 +++++++++++++++
 netlink.c |  9 +++++++--
 tcp.c     | 36 ++++++++++++++++++++++++++++++++++++
 3 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 4647210..443c39d 100644
--- a/Makefile
+++ b/Makefile
@@ -45,6 +45,21 @@ ifeq ($(shell printf "$(C)" | $(CC) -S -xc - -o - >/dev/null 2>&1; echo $$?),0)
 	CFLAGS += -DHAS_SND_WND
 endif
 
+C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_bytes_acked = 0 };
+ifeq ($(shell printf "$(C)" | $(CC) -S -xc - -o - >/dev/null 2>&1; echo $$?),0)
+	CFLAGS += -DHAS_BYTES_ACKED
+endif
+
+C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_min_rtt = 0 };
+ifeq ($(shell printf "$(C)" | $(CC) -S -xc - -o - >/dev/null 2>&1; echo $$?),0)
+	CFLAGS += -DHAS_MIN_RTT
+endif
+
+C := \#include <sys/random.h>\nint main(){int a=getrandom(0, 0, 0);}
+ifeq ($(shell printf "$(C)" | $(CC) -S -xc - -o - >/dev/null 2>&1; echo $$?),0)
+	CFLAGS += -DHAS_GETRANDOM
+endif
+
 prefix ?= /usr/local
 
 all: passt pasta passt4netns qrap
diff --git a/netlink.c b/netlink.c
index 0948f45..3ba5f05 100644
--- a/netlink.c
+++ b/netlink.c
@@ -46,7 +46,10 @@ static int nl_seq;
 static int nl_sock_init_do(void *arg)
 {
 	struct sockaddr_nl addr = { .nl_family = AF_NETLINK, };
-	int *s = &nl_sock, v = 1;
+	int *s = &nl_sock;
+#ifdef NETLINK_GET_STRICT_CHK
+	int y = 1;
+#endif
 
 ns:
 	if (((*s) = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)) < 0 ||
@@ -56,7 +59,9 @@ ns:
 	if (*s == -1 || !arg || s == &nl_sock_ns)
 		return 0;
 
-	setsockopt(*s, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &v, sizeof(v));
+#ifdef NETLINK_GET_STRICT_CHK
+	setsockopt(*s, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &y, sizeof(y));
+#endif
 
 	ns_enter((struct ctx *)arg);
 	s = &nl_sock_ns;
diff --git a/tcp.c b/tcp.c
index 96d462f..839bf30 100644
--- a/tcp.c
+++ b/tcp.c
@@ -321,7 +321,9 @@
 #include <stddef.h>
 #include <string.h>
 #include <sys/epoll.h>
+#ifdef HAS_GETRANDOM
 #include <sys/random.h>
+#endif
 #include <sys/socket.h>
 #include <sys/types.h>
 #include <sys/uio.h>
@@ -760,6 +762,7 @@ static int tcp_rtt_dst_low(struct tcp_tap_conn *conn)
  */
 static void tcp_rtt_dst_check(struct tcp_tap_conn *conn, struct tcp_info *tinfo)
 {
+#ifdef HAS_MIN_RTT
 	int i, hole = -1;
 
 	if (!tinfo->tcpi_min_rtt ||
@@ -777,6 +780,10 @@ static void tcp_rtt_dst_check(struct tcp_tap_conn *conn, struct tcp_info *tinfo)
 	if (hole == LOW_RTT_TABLE_SIZE)
 		hole = 0;
 	memcpy(low_rtt_dst + hole, &in6addr_any, sizeof(conn->a.a6));
+#else
+	(void)conn;
+	(void)tinfo;
+#endif /* HAS_MIN_RTT */
 }
 
 /**
@@ -1552,6 +1559,13 @@ static int tcp_update_seqack_wnd(struct ctx *c, struct tcp_tap_conn *conn,
 	struct tcp_info tinfo_new;
 	int s = conn->sock;
 
+#ifndef HAS_BYTES_ACKED
+	(void)flags;
+
+	conn->seq_ack_to_tap = conn->seq_from_tap;
+	if (SEQ_LT(conn->seq_ack_to_tap, prev_ack_to_tap))
+		conn->seq_ack_to_tap = prev_ack_to_tap;
+#else
 	if (conn->state > ESTABLISHED || (flags & (DUP_ACK | FORCE_ACK)) ||
 	    conn->local || tcp_rtt_dst_low(conn) ||
 	    conn->snd_buf < SNDBUF_SMALL) {
@@ -1569,6 +1583,7 @@ static int tcp_update_seqack_wnd(struct ctx *c, struct tcp_tap_conn *conn,
 		if (SEQ_LT(conn->seq_ack_to_tap, prev_ack_to_tap))
 			conn->seq_ack_to_tap = prev_ack_to_tap;
 	}
+#endif /* !HAS_BYTES_ACKED */
 
 	if (!KERNEL_REPORTS_SND_WND(c)) {
 		tcp_get_sndbuf(conn);
@@ -3586,9 +3601,30 @@ int tcp_sock_init(struct ctx *c, struct timespec *now)
 {
 	struct tcp_sock_refill_arg refill_arg = { c, 0 };
 	int i, port;
+#ifndef HAS_GETRANDOM
+	int dev_random = open("/dev/random", O_RDONLY);
+	unsigned int random_read = 0;
+
+	while (dev_random && random_read < sizeof(c->tcp.hash_secret)) {
+		int ret = read(dev_random,
+			       (uint8_t *)&c->tcp.hash_secret + random_read,
+			       sizeof(c->tcp.hash_secret) - random_read);
 
+		if (ret == -1 && errno == EINTR)
+			continue;
+
+		if (ret <= 0)
+			break;
+
+		random_read += ret;
+	}
+	if (dev_random >= 0)
+		close(dev_random);
+	if (random_read < sizeof(c->tcp.hash_secret)) {
+#else
 	if (getrandom(&c->tcp.hash_secret, sizeof(c->tcp.hash_secret),
 		      GRND_RANDOM) < 0) {
+#endif /* !HAS_GETRANDOM */
 		perror("TCP initial sequence getrandom");
 		exit(EXIT_FAILURE);
 	}
-- 
@@ -321,7 +321,9 @@
 #include <stddef.h>
 #include <string.h>
 #include <sys/epoll.h>
+#ifdef HAS_GETRANDOM
 #include <sys/random.h>
+#endif
 #include <sys/socket.h>
 #include <sys/types.h>
 #include <sys/uio.h>
@@ -760,6 +762,7 @@ static int tcp_rtt_dst_low(struct tcp_tap_conn *conn)
  */
 static void tcp_rtt_dst_check(struct tcp_tap_conn *conn, struct tcp_info *tinfo)
 {
+#ifdef HAS_MIN_RTT
 	int i, hole = -1;
 
 	if (!tinfo->tcpi_min_rtt ||
@@ -777,6 +780,10 @@ static void tcp_rtt_dst_check(struct tcp_tap_conn *conn, struct tcp_info *tinfo)
 	if (hole == LOW_RTT_TABLE_SIZE)
 		hole = 0;
 	memcpy(low_rtt_dst + hole, &in6addr_any, sizeof(conn->a.a6));
+#else
+	(void)conn;
+	(void)tinfo;
+#endif /* HAS_MIN_RTT */
 }
 
 /**
@@ -1552,6 +1559,13 @@ static int tcp_update_seqack_wnd(struct ctx *c, struct tcp_tap_conn *conn,
 	struct tcp_info tinfo_new;
 	int s = conn->sock;
 
+#ifndef HAS_BYTES_ACKED
+	(void)flags;
+
+	conn->seq_ack_to_tap = conn->seq_from_tap;
+	if (SEQ_LT(conn->seq_ack_to_tap, prev_ack_to_tap))
+		conn->seq_ack_to_tap = prev_ack_to_tap;
+#else
 	if (conn->state > ESTABLISHED || (flags & (DUP_ACK | FORCE_ACK)) ||
 	    conn->local || tcp_rtt_dst_low(conn) ||
 	    conn->snd_buf < SNDBUF_SMALL) {
@@ -1569,6 +1583,7 @@ static int tcp_update_seqack_wnd(struct ctx *c, struct tcp_tap_conn *conn,
 		if (SEQ_LT(conn->seq_ack_to_tap, prev_ack_to_tap))
 			conn->seq_ack_to_tap = prev_ack_to_tap;
 	}
+#endif /* !HAS_BYTES_ACKED */
 
 	if (!KERNEL_REPORTS_SND_WND(c)) {
 		tcp_get_sndbuf(conn);
@@ -3586,9 +3601,30 @@ int tcp_sock_init(struct ctx *c, struct timespec *now)
 {
 	struct tcp_sock_refill_arg refill_arg = { c, 0 };
 	int i, port;
+#ifndef HAS_GETRANDOM
+	int dev_random = open("/dev/random", O_RDONLY);
+	unsigned int random_read = 0;
+
+	while (dev_random && random_read < sizeof(c->tcp.hash_secret)) {
+		int ret = read(dev_random,
+			       (uint8_t *)&c->tcp.hash_secret + random_read,
+			       sizeof(c->tcp.hash_secret) - random_read);
 
+		if (ret == -1 && errno == EINTR)
+			continue;
+
+		if (ret <= 0)
+			break;
+
+		random_read += ret;
+	}
+	if (dev_random >= 0)
+		close(dev_random);
+	if (random_read < sizeof(c->tcp.hash_secret)) {
+#else
 	if (getrandom(&c->tcp.hash_secret, sizeof(c->tcp.hash_secret),
 		      GRND_RANDOM) < 0) {
+#endif /* !HAS_GETRANDOM */
 		perror("TCP initial sequence getrandom");
 		exit(EXIT_FAILURE);
 	}
-- 
2.33.0


  parent reply	other threads:[~2022-01-28 18:33 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-28 18:33 [PATCH 00/22] Fixes for non-x86_64, older kernels/glibc, and some more Stefano Brivio
2022-01-28 18:33 ` [PATCH 01/22] tcp: Cover all usages of tcpi_snd_wnd with HAS_SND_WND Stefano Brivio
2022-01-28 18:33 ` [PATCH 02/22] tap, tcp: Fix two comparisons with different signedness reported by gcc 7 Stefano Brivio
2022-01-28 18:33 ` [PATCH 03/22] passt: Drop <linux/ipv6.h> include, carry own ipv6hdr and opt_hdr definitions Stefano Brivio
2022-01-28 18:33 ` [PATCH 04/22] Makefile, seccomp: Fix build for i386, ppc64, ppc64le Stefano Brivio
2022-01-28 18:33 ` [PATCH 05/22] util: Fall-back definitions for SECCOMP_RET_KILL_PROCESS, ETH_{MAX,MIN}_MTU Stefano Brivio
2022-01-28 18:33 ` [PATCH 06/22] seccomp: Introduce mechanism to allow per-arch syscalls Stefano Brivio
2022-01-28 18:33 ` Stefano Brivio [this message]
2022-01-28 18:33 ` [PATCH 08/22] conf, pasta: Explicitly pass CLONE_{NEWUSER,NEWNET} to setns() Stefano Brivio
2022-01-28 18:33 ` [PATCH 09/22] tcp, udp, util: Fixes for bitmap handling on big-endian, casts Stefano Brivio
2022-01-28 18:33 ` [PATCH 10/22] netlink: Fix swapped v4/v6-only flags in external interface detection Stefano Brivio
2022-01-28 18:33 ` [PATCH 11/22] pasta: Check for zero d_reclen returned by getdents64() syscall Stefano Brivio
2022-01-28 18:33 ` [PATCH 12/22] tcp: Don't round down MSS to >= 64KiB page size, but clamp it in any case Stefano Brivio
2022-01-28 18:33 ` [PATCH 13/22] seccomp: Add a number of alternate and per-arch syscalls Stefano Brivio
2022-01-28 18:33 ` [PATCH 14/22] demo/pasta: Don't wait for pasta to return to a prompt Stefano Brivio
2022-01-28 18:33 ` [PATCH 15/22] test/two_guests: Drop stray spaces after sleep directives Stefano Brivio
2022-01-28 18:33 ` [PATCH 16/22] perf/passt_udp: Lower failure throughput thresholds with big MTUs Stefano Brivio
2022-01-28 18:33 ` [PATCH 17/22] test/lib/setup: Don't rely on IFS to properly separate qemu arguments Stefano Brivio
2022-01-28 18:33 ` [PATCH 18/22] test/lib/video: Drop -preset ultrafast from ffmpeg arguments Stefano Brivio
2022-01-28 18:33 ` [PATCH 19/22] hooks/pre-push: Delete old versions, add -DGLIBC_NO_STATIC_NSS, disable legacy builds Stefano Brivio
2022-01-28 18:33 ` [PATCH 20/22] conf: Fix support for --stderr as short option (-e) Stefano Brivio
2022-01-28 18:33 ` [PATCH 21/22] README: Fix anchor for Performance section Stefano Brivio
2022-01-28 18:33 ` [PATCH 22/22] README: Fix link to IGMP/MLD proxy ticket Stefano Brivio

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220128183357.3407606-8-sbrivio@redhat.com \
    --to=sbrivio@redhat.com \
    --cc=passt-dev@passt.top \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).