From mboxrd@z Thu Jan 1 00:00:00 1970 From: Stefano Brivio To: passt-dev@passt.top Subject: [PATCH] passt, pasta: Run-time selection of AVX2 build Date: Mon, 28 Feb 2022 16:42:14 +0100 Message-ID: <20220228154214.2803980-6-sbrivio@redhat.com> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="===============2103072843874658287==" --===============2103072843874658287== Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Build-time selection of AVX2 flags and routines is not practical for distributions, but limiting AVX2 usage to checksum routines with specific run-time detection doesn't allow for easy performance gains from auto-vectorisation of batched packet handling routines. For x86_64, build non-AVX2 and AVX2 binaries, and implement a simple wrapper replacing the current executable with the AVX2 build if it's available, and if AVX2 is supported by the current CPU. Signed-off-by: Stefano Brivio --- Makefile | 38 ++++++++++++++++++++++++++++---------- README.md | 28 +++++++++++----------------- hooks/pre-push | 9 --------- passt.c | 3 +++ test/build/all | 10 ---------- test/demo/passt | 2 +- test/demo/pasta | 2 +- 7 files changed, 44 insertions(+), 48 deletions(-) diff --git a/Makefile b/Makefile index 031b684..8387719 100644 --- a/Makefile +++ b/Makefile @@ -14,8 +14,11 @@ ifeq ($(RLIMIT_STACK_VAL),unlimited) RLIMIT_STACK_VAL :=3D 1024 endif =20 -AUDIT_ARCH :=3D $(shell uname -m | tr [a-z] [A-Z]) -AUDIT_ARCH :=3D $(shell echo $(AUDIT_ARCH) | sed 's/^ARM.*/ARM/') +# Get 'uname -m'-like architecture description for target +TARGET_ARCH :=3D $(shell $(CC) -dumpmachine | cut -f1 -d- | tr [a-z] [A-Z]) +TARGET_ARCH :=3D $(shell echo $(TARGET_ARCH) | sed 's/POWERPC/PPC/') + +AUDIT_ARCH :=3D $(shell echo $(TARGET_ARCH) | sed 's/^ARM.*/ARM/') AUDIT_ARCH :=3D $(shell echo $(AUDIT_ARCH) | sed 's/I[456]86/I386/') AUDIT_ARCH :=3D $(shell echo $(AUDIT_ARCH) | sed 's/PPC64/PPC/') AUDIT_ARCH :=3D $(shell echo $(AUDIT_ARCH) | sed 's/PPCLE/PPC64LE/') @@ -25,7 +28,7 @@ CFLAGS +=3D -DPAGE_SIZE=3D$(shell getconf PAGE_SIZE) CFLAGS +=3D -DNETNS_RUN_DIR=3D\"/run/netns\" CFLAGS +=3D -DPASST_AUDIT_ARCH=3DAUDIT_ARCH_$(AUDIT_ARCH) CFLAGS +=3D -DRLIMIT_STACK_VAL=3D$(RLIMIT_STACK_VAL) -CFLAGS +=3D -DARCH=3D\"$(shell uname -m)\" +CFLAGS +=3D -DARCH=3D\"$(TARGET_ARCH)\" =20 # On gcc 11.2, with -O2 and -flto, tcp_hash() and siphash_20b(), if inlined, # seem to be hitting something similar to: @@ -63,10 +66,13 @@ endif =20 prefix ?=3D /usr/local =20 +ifeq ($(TARGET_ARCH),X86_64) +all: passt passt.avx2 pasta pasta.avx2 qrap +BIN :=3D passt passt.avx2 pasta pasta.avx2 qrap +else all: passt pasta qrap - -avx2: CFLAGS +=3D -Ofast -mavx2 -ftree-vectorize -funroll-loops -avx2: clean all +BIN :=3D passt pasta qrap +endif =20 static: CFLAGS +=3D -static -DGLIBC_NO_STATIC_NSS static: clean all @@ -78,6 +84,16 @@ passt: $(filter-out qrap.c,$(wildcard *.c)) \ $(filter-out qrap.h,$(wildcard *.h)) seccomp.h $(CC) $(CFLAGS) $(filter-out qrap.c,$(wildcard *.c)) -o passt =20 +passt.avx2: CFLAGS +=3D -Ofast -mavx2 -ftree-vectorize -funroll-loops +passt.avx2: $(filter-out qrap.c,$(wildcard *.c)) \ + $(filter-out qrap.h,$(wildcard *.h)) seccomp.h + $(CC) $(CFLAGS) $(filter-out qrap.c,$(wildcard *.c)) -o passt.avx2 + +passt.avx2: passt + +pasta.avx2: passt.avx2 + ln -s passt.avx2 pasta.avx2 + pasta: passt ln -s passt pasta ln -s passt.1 pasta.1 @@ -88,24 +104,26 @@ qrap: qrap.c passt.h =20 .PHONY: clean clean: - -${RM} passt *.o seccomp.h qrap pasta pasta.1 \ + -${RM} passt passt.avx2 *.o seccomp.h qrap pasta pasta.avx2 pasta.1 \ passt.tar passt.tar.gz *.deb *.rpm =20 -install: passt pasta qrap +install: $(BIN) mkdir -p $(DESTDIR)$(prefix)/bin $(DESTDIR)$(prefix)/share/man/man1 - cp -d passt pasta qrap $(DESTDIR)$(prefix)/bin + cp -d $(BIN) $(DESTDIR)$(prefix)/bin cp -d passt.1 pasta.1 qrap.1 $(DESTDIR)$(prefix)/share/man/man1 =20 uninstall: -${RM} $(DESTDIR)$(prefix)/bin/passt + -${RM} $(DESTDIR)$(prefix)/bin/passt.avx2 -${RM} $(DESTDIR)$(prefix)/bin/pasta + -${RM} $(DESTDIR)$(prefix)/bin/pasta.avx2 -${RM} $(DESTDIR)$(prefix)/bin/qrap -${RM} $(DESTDIR)$(prefix)/share/man/man1/passt.1 -${RM} $(DESTDIR)$(prefix)/share/man/man1/pasta.1 -${RM} $(DESTDIR)$(prefix)/share/man/man1/qrap.1 =20 pkgs: - tar cf passt.tar -P --xform 's//\/usr\/bin\//' passt pasta qrap + tar cf passt.tar -P --xform 's//\/usr\/bin\//' $(BIN) tar rf passt.tar -P --xform 's//\/usr\/share\/man\/man1\//' \ passt.1 pasta.1 qrap.1 gzip passt.tar diff --git a/README.md b/README.md index 14e1777..4966e15 100644 --- a/README.md +++ b/README.md @@ -220,7 +220,7 @@ speeding up local connections, and usually requiring NAT.= _pasta_: * Linux * =E2=9C=85 starting from 4.18 kernel version * =E2=9C=85 starting from 3.13 kernel version -* =F0=9F=9B=A0 build-time selection of AVX2 instructions (as much as possibl= e) +* =E2=9C=85 run-time selection of AVX2 build * =E2=8C=9A [_musl_](https://bugs.passt.top/show_bug.cgi?id=3D4) and [_uClibc-ng_](https://bugs.passt.top/show_bug.cgi?id=3D5) * =E2=8C=9A [FreeBSD](https://bugs.passt.top/show_bug.cgi?id=3D6), @@ -467,15 +467,12 @@ Test logs [here](/builds/latest/test/). cd passt make =20 - * alternatively, static builds for x86_64, with or without AVX2 instruct= ions, - as of the latest commit are also available for convenience - [here](/builds/latest/x86_64/avx2/) and - [here](/builds/latest/x86_64/). Convenience, non-official - packages for Debian (and derivatives) and RPM-based distributions are = also - available there. These binaries and packages are simply built with: + * alternatively, static builds for x86_64 as of the latest commit are al= so + available for convenience [here](/builds/latest/x86_64/). Convenience, + non-official packages for Debian (and derivatives) and RPM-based + distributions are also available there. These binaries and packages are + simply built with: =20 - CFLAGS=3D"-static" make avx2 - make pkgs make static make pkgs =20 @@ -530,15 +527,12 @@ Test logs [here](/builds/latest/test/). cd passt make =20 - * alternatively, static builds for x86_64, with or without AVX2 instruct= ions, - as of the latest commit are also available for convenience - [here](/builds/latest/x86_64/avx2/) and - [here](/builds/latest/x86_64/). Convenience, non-official - packages for Debian (and derivatives) and RPM-based distributions are = also - available there. These binaries and packages are simply built with: + * alternatively, static builds for x86_64 as of the latest commit are al= so + available for convenience [here](/builds/latest/x86_64/). Convenience, + non-official packages for Debian (and derivatives) and RPM-based + distributions are also available there. These binaries and packages are + simply built with: =20 - CFLAGS=3D"-static" make avx2 - make pkgs make static make pkgs =20 diff --git a/hooks/pre-push b/hooks/pre-push index a3f67f4..6a9b712 100755 --- a/hooks/pre-push +++ b/hooks/pre-push @@ -58,15 +58,6 @@ ssh "${USER_HOST}" "rm -f ${BIN}/*.deb" ssh "${USER_HOST}" "rm -f ${BIN}/*.rpm" scp *.deb *.rpm "${USER_HOST}:${BIN}/" =20 -CFLAGS=3D"-static -DGLIBC_NO_STATIC_NSS" make avx2 -ssh "${USER_HOST}" "mkdir -p ${BIN}/avx2" -scp passt pasta qrap passt.1 pasta.1 qrap.1 "${USER_HOST}:${BIN}/avx2/" - -make pkgs -ssh "${USER_HOST}" "rm -f ${BIN}/avx2/*.deb" -ssh "${USER_HOST}" "rm -f ${BIN}/avx2/*.rpm" -scp *.deb *.rpm "${USER_HOST}:${BIN}/avx2/" - ssh "${USER_HOST}" "mv ${LATEST} ${AWAY}" ssh "${USER_HOST}" "mv ${TEMP} ${LATEST}" ssh "${USER_HOST}" "rm -rf ${AWAY}" diff --git a/passt.c b/passt.c index e7dd108..40d3e57 100644 --- a/passt.c +++ b/passt.c @@ -69,6 +69,7 @@ #include "tap.h" #include "conf.h" #include "pasta.h" +#include "arch.h" =20 #define EPOLL_EVENTS 8 =20 @@ -313,6 +314,8 @@ int main(int argc, char **argv) struct sigaction sa; char *log_name; =20 + arch_avx2_exec(argv); + check_root(); drop_caps(); =20 diff --git a/test/build/all b/test/build/all index 9aa6c61..6043793 100644 --- a/test/build/all +++ b/test/build/all @@ -40,13 +40,3 @@ host CFLAGS=3D"-Werror" make check [ -f passt ] check [ -h pasta ] check [ -f qrap ] - -test Build AVX2 -host make clean -check ! [ -e passt ] -check ! [ -e pasta ] -check ! [ -e qrap ] -host CFLAGS=3D"-Werror" make avx2 -check [ -f passt ] -check [ -h pasta ] -check [ -f qrap ] diff --git a/test/demo/passt b/test/demo/passt index 76aac86..8838363 100644 --- a/test/demo/passt +++ b/test/demo/passt @@ -29,7 +29,7 @@ sleep 1 say and build it. sleep 1 host cd passt -host make avx2 +host make sleep 1 =20 nl diff --git a/test/demo/pasta b/test/demo/pasta index b2dd327..74fca85 100644 --- a/test/demo/pasta +++ b/test/demo/pasta @@ -169,7 +169,7 @@ say more in the "Performance" section below. sleep 3 ns exit passt exit -passt CFLAGS=3D"-g" make avx2 +passt CFLAGS=3D"-g" make sleep 2 passtb perf record -g ./pasta sleep 2 --=20 2.34.1 --===============2103072843874658287==--