From: Stefano Brivio <sbrivio@redhat.com>
To: passt-dev@passt.top
Subject: [PATCH 07/22] tcp, netlink, HAS{BYTES_ACKED,MIN_RTT,GETRANDOM} and NETLINK_GET_STRICT_CHK
Date: Fri, 28 Jan 2022 19:33:42 +0100 [thread overview]
Message-ID: <20220128183357.3407606-8-sbrivio@redhat.com> (raw)
In-Reply-To: <20220128183357.3407606-1-sbrivio@redhat.com>
[-- Attachment #1: Type: text/plain, Size: 4895 bytes --]
tcpi_bytes_acked and tcpi_min_rtt are only available on recent
kernel versions: provide fall-back paths (incurring some grade of
performance penalty).
Support for getrandom() was introduced in Linux 3.17 and glibc 2.25:
provide an alternate mechanism for that as well, reading from
/dev/random.
Also check if NETLINK_GET_STRICT_CHK is defined before using it:
it's not strictly needed, we'll filter out irrelevant results from
netlink anyway.
Signed-off-by: Stefano Brivio <sbrivio(a)redhat.com>
---
Makefile | 15 +++++++++++++++
netlink.c | 9 +++++++--
tcp.c | 36 ++++++++++++++++++++++++++++++++++++
3 files changed, 58 insertions(+), 2 deletions(-)
diff --git a/Makefile b/Makefile
index 4647210..443c39d 100644
--- a/Makefile
+++ b/Makefile
@@ -45,6 +45,21 @@ ifeq ($(shell printf "$(C)" | $(CC) -S -xc - -o - >/dev/null 2>&1; echo $$?),0)
CFLAGS += -DHAS_SND_WND
endif
+C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_bytes_acked = 0 };
+ifeq ($(shell printf "$(C)" | $(CC) -S -xc - -o - >/dev/null 2>&1; echo $$?),0)
+ CFLAGS += -DHAS_BYTES_ACKED
+endif
+
+C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_min_rtt = 0 };
+ifeq ($(shell printf "$(C)" | $(CC) -S -xc - -o - >/dev/null 2>&1; echo $$?),0)
+ CFLAGS += -DHAS_MIN_RTT
+endif
+
+C := \#include <sys/random.h>\nint main(){int a=getrandom(0, 0, 0);}
+ifeq ($(shell printf "$(C)" | $(CC) -S -xc - -o - >/dev/null 2>&1; echo $$?),0)
+ CFLAGS += -DHAS_GETRANDOM
+endif
+
prefix ?= /usr/local
all: passt pasta passt4netns qrap
diff --git a/netlink.c b/netlink.c
index 0948f45..3ba5f05 100644
--- a/netlink.c
+++ b/netlink.c
@@ -46,7 +46,10 @@ static int nl_seq;
static int nl_sock_init_do(void *arg)
{
struct sockaddr_nl addr = { .nl_family = AF_NETLINK, };
- int *s = &nl_sock, v = 1;
+ int *s = &nl_sock;
+#ifdef NETLINK_GET_STRICT_CHK
+ int y = 1;
+#endif
ns:
if (((*s) = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)) < 0 ||
@@ -56,7 +59,9 @@ ns:
if (*s == -1 || !arg || s == &nl_sock_ns)
return 0;
- setsockopt(*s, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &v, sizeof(v));
+#ifdef NETLINK_GET_STRICT_CHK
+ setsockopt(*s, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &y, sizeof(y));
+#endif
ns_enter((struct ctx *)arg);
s = &nl_sock_ns;
diff --git a/tcp.c b/tcp.c
index 96d462f..839bf30 100644
--- a/tcp.c
+++ b/tcp.c
@@ -321,7 +321,9 @@
#include <stddef.h>
#include <string.h>
#include <sys/epoll.h>
+#ifdef HAS_GETRANDOM
#include <sys/random.h>
+#endif
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/uio.h>
@@ -760,6 +762,7 @@ static int tcp_rtt_dst_low(struct tcp_tap_conn *conn)
*/
static void tcp_rtt_dst_check(struct tcp_tap_conn *conn, struct tcp_info *tinfo)
{
+#ifdef HAS_MIN_RTT
int i, hole = -1;
if (!tinfo->tcpi_min_rtt ||
@@ -777,6 +780,10 @@ static void tcp_rtt_dst_check(struct tcp_tap_conn *conn, struct tcp_info *tinfo)
if (hole == LOW_RTT_TABLE_SIZE)
hole = 0;
memcpy(low_rtt_dst + hole, &in6addr_any, sizeof(conn->a.a6));
+#else
+ (void)conn;
+ (void)tinfo;
+#endif /* HAS_MIN_RTT */
}
/**
@@ -1552,6 +1559,13 @@ static int tcp_update_seqack_wnd(struct ctx *c, struct tcp_tap_conn *conn,
struct tcp_info tinfo_new;
int s = conn->sock;
+#ifndef HAS_BYTES_ACKED
+ (void)flags;
+
+ conn->seq_ack_to_tap = conn->seq_from_tap;
+ if (SEQ_LT(conn->seq_ack_to_tap, prev_ack_to_tap))
+ conn->seq_ack_to_tap = prev_ack_to_tap;
+#else
if (conn->state > ESTABLISHED || (flags & (DUP_ACK | FORCE_ACK)) ||
conn->local || tcp_rtt_dst_low(conn) ||
conn->snd_buf < SNDBUF_SMALL) {
@@ -1569,6 +1583,7 @@ static int tcp_update_seqack_wnd(struct ctx *c, struct tcp_tap_conn *conn,
if (SEQ_LT(conn->seq_ack_to_tap, prev_ack_to_tap))
conn->seq_ack_to_tap = prev_ack_to_tap;
}
+#endif /* !HAS_BYTES_ACKED */
if (!KERNEL_REPORTS_SND_WND(c)) {
tcp_get_sndbuf(conn);
@@ -3586,9 +3601,30 @@ int tcp_sock_init(struct ctx *c, struct timespec *now)
{
struct tcp_sock_refill_arg refill_arg = { c, 0 };
int i, port;
+#ifndef HAS_GETRANDOM
+ int dev_random = open("/dev/random", O_RDONLY);
+ unsigned int random_read = 0;
+
+ while (dev_random && random_read < sizeof(c->tcp.hash_secret)) {
+ int ret = read(dev_random,
+ (uint8_t *)&c->tcp.hash_secret + random_read,
+ sizeof(c->tcp.hash_secret) - random_read);
+ if (ret == -1 && errno == EINTR)
+ continue;
+
+ if (ret <= 0)
+ break;
+
+ random_read += ret;
+ }
+ if (dev_random >= 0)
+ close(dev_random);
+ if (random_read < sizeof(c->tcp.hash_secret)) {
+#else
if (getrandom(&c->tcp.hash_secret, sizeof(c->tcp.hash_secret),
GRND_RANDOM) < 0) {
+#endif /* !HAS_GETRANDOM */
perror("TCP initial sequence getrandom");
exit(EXIT_FAILURE);
}
--
@@ -321,7 +321,9 @@
#include <stddef.h>
#include <string.h>
#include <sys/epoll.h>
+#ifdef HAS_GETRANDOM
#include <sys/random.h>
+#endif
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/uio.h>
@@ -760,6 +762,7 @@ static int tcp_rtt_dst_low(struct tcp_tap_conn *conn)
*/
static void tcp_rtt_dst_check(struct tcp_tap_conn *conn, struct tcp_info *tinfo)
{
+#ifdef HAS_MIN_RTT
int i, hole = -1;
if (!tinfo->tcpi_min_rtt ||
@@ -777,6 +780,10 @@ static void tcp_rtt_dst_check(struct tcp_tap_conn *conn, struct tcp_info *tinfo)
if (hole == LOW_RTT_TABLE_SIZE)
hole = 0;
memcpy(low_rtt_dst + hole, &in6addr_any, sizeof(conn->a.a6));
+#else
+ (void)conn;
+ (void)tinfo;
+#endif /* HAS_MIN_RTT */
}
/**
@@ -1552,6 +1559,13 @@ static int tcp_update_seqack_wnd(struct ctx *c, struct tcp_tap_conn *conn,
struct tcp_info tinfo_new;
int s = conn->sock;
+#ifndef HAS_BYTES_ACKED
+ (void)flags;
+
+ conn->seq_ack_to_tap = conn->seq_from_tap;
+ if (SEQ_LT(conn->seq_ack_to_tap, prev_ack_to_tap))
+ conn->seq_ack_to_tap = prev_ack_to_tap;
+#else
if (conn->state > ESTABLISHED || (flags & (DUP_ACK | FORCE_ACK)) ||
conn->local || tcp_rtt_dst_low(conn) ||
conn->snd_buf < SNDBUF_SMALL) {
@@ -1569,6 +1583,7 @@ static int tcp_update_seqack_wnd(struct ctx *c, struct tcp_tap_conn *conn,
if (SEQ_LT(conn->seq_ack_to_tap, prev_ack_to_tap))
conn->seq_ack_to_tap = prev_ack_to_tap;
}
+#endif /* !HAS_BYTES_ACKED */
if (!KERNEL_REPORTS_SND_WND(c)) {
tcp_get_sndbuf(conn);
@@ -3586,9 +3601,30 @@ int tcp_sock_init(struct ctx *c, struct timespec *now)
{
struct tcp_sock_refill_arg refill_arg = { c, 0 };
int i, port;
+#ifndef HAS_GETRANDOM
+ int dev_random = open("/dev/random", O_RDONLY);
+ unsigned int random_read = 0;
+
+ while (dev_random && random_read < sizeof(c->tcp.hash_secret)) {
+ int ret = read(dev_random,
+ (uint8_t *)&c->tcp.hash_secret + random_read,
+ sizeof(c->tcp.hash_secret) - random_read);
+ if (ret == -1 && errno == EINTR)
+ continue;
+
+ if (ret <= 0)
+ break;
+
+ random_read += ret;
+ }
+ if (dev_random >= 0)
+ close(dev_random);
+ if (random_read < sizeof(c->tcp.hash_secret)) {
+#else
if (getrandom(&c->tcp.hash_secret, sizeof(c->tcp.hash_secret),
GRND_RANDOM) < 0) {
+#endif /* !HAS_GETRANDOM */
perror("TCP initial sequence getrandom");
exit(EXIT_FAILURE);
}
--
2.33.0
next prev parent reply other threads:[~2022-01-28 18:33 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-01-28 18:33 [PATCH 00/22] Fixes for non-x86_64, older kernels/glibc, and some more Stefano Brivio
2022-01-28 18:33 ` [PATCH 01/22] tcp: Cover all usages of tcpi_snd_wnd with HAS_SND_WND Stefano Brivio
2022-01-28 18:33 ` [PATCH 02/22] tap, tcp: Fix two comparisons with different signedness reported by gcc 7 Stefano Brivio
2022-01-28 18:33 ` [PATCH 03/22] passt: Drop <linux/ipv6.h> include, carry own ipv6hdr and opt_hdr definitions Stefano Brivio
2022-01-28 18:33 ` [PATCH 04/22] Makefile, seccomp: Fix build for i386, ppc64, ppc64le Stefano Brivio
2022-01-28 18:33 ` [PATCH 05/22] util: Fall-back definitions for SECCOMP_RET_KILL_PROCESS, ETH_{MAX,MIN}_MTU Stefano Brivio
2022-01-28 18:33 ` [PATCH 06/22] seccomp: Introduce mechanism to allow per-arch syscalls Stefano Brivio
2022-01-28 18:33 ` Stefano Brivio [this message]
2022-01-28 18:33 ` [PATCH 08/22] conf, pasta: Explicitly pass CLONE_{NEWUSER,NEWNET} to setns() Stefano Brivio
2022-01-28 18:33 ` [PATCH 09/22] tcp, udp, util: Fixes for bitmap handling on big-endian, casts Stefano Brivio
2022-01-28 18:33 ` [PATCH 10/22] netlink: Fix swapped v4/v6-only flags in external interface detection Stefano Brivio
2022-01-28 18:33 ` [PATCH 11/22] pasta: Check for zero d_reclen returned by getdents64() syscall Stefano Brivio
2022-01-28 18:33 ` [PATCH 12/22] tcp: Don't round down MSS to >= 64KiB page size, but clamp it in any case Stefano Brivio
2022-01-28 18:33 ` [PATCH 13/22] seccomp: Add a number of alternate and per-arch syscalls Stefano Brivio
2022-01-28 18:33 ` [PATCH 14/22] demo/pasta: Don't wait for pasta to return to a prompt Stefano Brivio
2022-01-28 18:33 ` [PATCH 15/22] test/two_guests: Drop stray spaces after sleep directives Stefano Brivio
2022-01-28 18:33 ` [PATCH 16/22] perf/passt_udp: Lower failure throughput thresholds with big MTUs Stefano Brivio
2022-01-28 18:33 ` [PATCH 17/22] test/lib/setup: Don't rely on IFS to properly separate qemu arguments Stefano Brivio
2022-01-28 18:33 ` [PATCH 18/22] test/lib/video: Drop -preset ultrafast from ffmpeg arguments Stefano Brivio
2022-01-28 18:33 ` [PATCH 19/22] hooks/pre-push: Delete old versions, add -DGLIBC_NO_STATIC_NSS, disable legacy builds Stefano Brivio
2022-01-28 18:33 ` [PATCH 20/22] conf: Fix support for --stderr as short option (-e) Stefano Brivio
2022-01-28 18:33 ` [PATCH 21/22] README: Fix anchor for Performance section Stefano Brivio
2022-01-28 18:33 ` [PATCH 22/22] README: Fix link to IGMP/MLD proxy ticket Stefano Brivio
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220128183357.3407606-8-sbrivio@redhat.com \
--to=sbrivio@redhat.com \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).