On Fri, Feb 02, 2024 at 03:11:31PM +0100, Laurent Vivier wrote:
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> ---
>  checksum.c | 39 ++++++++++++++++++++++-----------------
>  checksum.h |  1 +
>  2 files changed, 23 insertions(+), 17 deletions(-)
> 
> diff --git a/checksum.c b/checksum.c
> index c94980771c63..14b6057684d9 100644
> --- a/checksum.c
> +++ b/checksum.c
> @@ -395,17 +395,8 @@ less_than_128_bytes:
>  	return (uint32_t)sum64;
>  }
>  
> -/**
> - * csum() - Compute TCP/IP-style checksum
> - * @buf:	Input buffer, must be aligned to 32-byte boundary
> - * @len:	Input length
> - * @init:	Initial 32-bit checksum, 0 for no pre-computed checksum
> - *
> - * Return: 16-bit folded, complemented checksum sum
> - */
> -/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
>  __attribute__((optimize("-fno-strict-aliasing")))	/* See csum_16b() */
> -uint16_t csum(const void *buf, size_t len, uint32_t init)
> +uint32_t csum_unfolded(const void *buf, size_t len, uint32_t init)

I'm wondering if this might be a little clearer with a feed() /
final() interface something like siphash.

>  {
>  	intptr_t align = ((intptr_t)buf + 0x1f) & ~(intptr_t)0x1f;
>  	unsigned int pad = align - (intptr_t)buf;
> @@ -419,24 +410,38 @@ uint16_t csum(const void *buf, size_t len, uint32_t init)
>  	if (len > pad)
>  		init = csum_avx2((void *)align, len - pad, init);
>  
> -	return (uint16_t)~csum_fold(init);
> +	return init;
>  }
> -
>  #else /* __AVX2__ */
>  
> +__attribute__((optimize("-fno-strict-aliasing")))	/* See csum_16b() */
> +uint32_t csum_unfolded(const void *buf, size_t len, uint32_t init)
> +{
> +	return sum_16b(buf, len) + init;
> +}
> +#endif /* !__AVX2__ */
> +
>  /**
>   * csum() - Compute TCP/IP-style checksum
> - * @buf:	Input buffer
> + * @buf:	Input buffer, must be aligned to 32-byte boundary

I thought the point of the previous patch was that this didn't have to
be 32-byte aligned any more.

>   * @len:	Input length
> - * @sum:	Initial 32-bit checksum, 0 for no pre-computed checksum
> + * @init:	Initial 32-bit checksum, 0 for no pre-computed checksum
>   *
> - * Return: 16-bit folded, complemented checksum
> + * Return: 16-bit folded, complemented checksum sum

"checksum sum"

>   */
>  /* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
>  __attribute__((optimize("-fno-strict-aliasing")))	/* See csum_16b() */
>  uint16_t csum(const void *buf, size_t len, uint32_t init)
>  {
> -	return csum_unaligned(buf, len, init);
> +	return (uint16_t)~csum_fold(csum_unfolded(buf, len, init));
>  }
>  
> -#endif /* !__AVX2__ */
> +uint16_t csum_iov(struct iovec *iov, unsigned int n, uint32_t init)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < n;  i++)
> +		init = csum_unfolded(iov[i].iov_base, iov[i].iov_len, init);
> +
> +	return (uint16_t)~csum_fold(init);
> +}
> diff --git a/checksum.h b/checksum.h
> index 21c0310d3804..6a20297a5826 100644
> --- a/checksum.h
> +++ b/checksum.h
> @@ -25,5 +25,6 @@ void csum_icmp6(struct icmp6hdr *icmp6hr,
>  		const struct in6_addr *saddr, const struct in6_addr *daddr,
>  		const void *payload, size_t len);
>  uint16_t csum(const void *buf, size_t len, uint32_t init);
> +uint16_t csum_iov(struct iovec *iov, unsigned int n, uint32_t init);
>  
>  #endif /* CHECKSUM_H */

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson