On Wed, Feb 14, 2024 at 09:56:23AM +0100, Laurent Vivier wrote: > if buffer is not aligned use sum_16b() only on the not aligned Nit: s/if/If/ > part, and then use csum_avx2() on the remaining part > > Remove unneeded now function csum_unaligned(). > > Signed-off-by: Laurent Vivier Reviewed-by: David Gibson > --- > > Notes: > v2: > - use ROUND_UP() and sizeof(__m256i) > - fix function comment > - remove csum_unaligned() and use csum() instead > > checksum.c | 47 ++++++++++++++++++++++++----------------------- > 1 file changed, 24 insertions(+), 23 deletions(-) > > diff --git a/checksum.c b/checksum.c > index f21c9b7a14d1..65486b4625ba 100644 > --- a/checksum.c > +++ b/checksum.c > @@ -56,6 +56,8 @@ > #include > #include > > +#include "util.h" > + > /* Checksums are optional for UDP over IPv4, so we usually just set > * them to 0. Change this to 1 to calculate real UDP over IPv4 > * checksums > @@ -110,20 +112,7 @@ uint16_t csum_fold(uint32_t sum) > return sum; > } > > -/** > - * csum_unaligned() - Compute TCP/IP-style checksum for not 32-byte aligned data > - * @buf: Input data > - * @len: Input length > - * @init: Initial 32-bit checksum, 0 for no pre-computed checksum > - * > - * Return: 16-bit IPv4-style checksum > - */ > -/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */ > -__attribute__((optimize("-fno-strict-aliasing"))) /* See csum_16b() */ > -uint16_t csum_unaligned(const void *buf, size_t len, uint32_t init) > -{ > - return (uint16_t)~csum_fold(sum_16b(buf, len) + init); > -} > +uint16_t csum(const void *buf, size_t len, uint32_t init); > > /** > * csum_ip4_header() - Calculate and set IPv4 header checksum > @@ -132,7 +121,7 @@ uint16_t csum_unaligned(const void *buf, size_t len, uint32_t init) > void csum_ip4_header(struct iphdr *ip4h) > { > ip4h->check = 0; > - ip4h->check = csum_unaligned(ip4h, (size_t)ip4h->ihl * 4, 0); > + ip4h->check = csum(ip4h, (size_t)ip4h->ihl * 4, 0); > } > > /** > @@ -159,7 +148,7 @@ void csum_udp4(struct udphdr *udp4hr, > + htons(IPPROTO_UDP); > /* Add in partial checksum for the UDP header alone */ > psum += sum_16b(udp4hr, sizeof(*udp4hr)); > - udp4hr->check = csum_unaligned(payload, len, psum); > + udp4hr->check = csum(payload, len, psum); > } > } > > @@ -178,7 +167,7 @@ void csum_icmp4(struct icmphdr *icmp4hr, const void *payload, size_t len) > /* Partial checksum for ICMP header alone */ > psum = sum_16b(icmp4hr, sizeof(*icmp4hr)); > > - icmp4hr->checksum = csum_unaligned(payload, len, psum); > + icmp4hr->checksum = csum(payload, len, psum); > } > > /** > @@ -199,7 +188,7 @@ void csum_udp6(struct udphdr *udp6hr, > udp6hr->check = 0; > /* Add in partial checksum for the UDP header alone */ > psum += sum_16b(udp6hr, sizeof(*udp6hr)); > - udp6hr->check = csum_unaligned(payload, len, psum); > + udp6hr->check = csum(payload, len, psum); > } > > /** > @@ -222,7 +211,7 @@ void csum_icmp6(struct icmp6hdr *icmp6hr, > icmp6hr->icmp6_cksum = 0; > /* Add in partial checksum for the ICMPv6 header alone */ > psum += sum_16b(icmp6hr, sizeof(*icmp6hr)); > - icmp6hr->icmp6_cksum = csum_unaligned(payload, len, psum); > + icmp6hr->icmp6_cksum = csum(payload, len, psum); > } > > #ifdef __AVX2__ > @@ -397,17 +386,29 @@ less_than_128_bytes: > > /** > * csum() - Compute TCP/IP-style checksum > - * @buf: Input buffer, must be aligned to 32-byte boundary > + * @buf: Input buffer > * @len: Input length > * @init: Initial 32-bit checksum, 0 for no pre-computed checksum > * > - * Return: 16-bit folded, complemented checksum sum > + * Return: 16-bit folded, complemented checksum > */ > /* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */ > __attribute__((optimize("-fno-strict-aliasing"))) /* See csum_16b() */ > uint16_t csum(const void *buf, size_t len, uint32_t init) > { > - return (uint16_t)~csum_fold(csum_avx2(buf, len, init)); > + intptr_t align = ROUND_UP((intptr_t)buf, sizeof(__m256i)); > + unsigned int pad = align - (intptr_t)buf; > + > + if (len < pad) > + pad = len; > + > + if (pad) > + init += sum_16b(buf, pad); > + > + if (len > pad) > + init = csum_avx2((void *)align, len - pad, init); > + > + return (uint16_t)~csum_fold(init); > } > > #else /* __AVX2__ */ > @@ -424,7 +425,7 @@ uint16_t csum(const void *buf, size_t len, uint32_t init) > __attribute__((optimize("-fno-strict-aliasing"))) /* See csum_16b() */ > uint16_t csum(const void *buf, size_t len, uint32_t init) > { > - return csum_unaligned(buf, len, init); > + return (uint16_t)~csum_fold(sum_16b(buf, len) + init); > } > > #endif /* !__AVX2__ */ -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson