[PATCH v3 3/9] checksum: align buffers

public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed

From: Laurent Vivier <lvivier@redhat.com>
To: passt-dev@passt.top
Cc: Laurent Vivier <lvivier@redhat.com>,
	David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v3 3/9] checksum: align buffers
Date: Sat, 17 Feb 2024 16:07:19 +0100	[thread overview]
Message-ID: <20240217150725.661467-4-lvivier@redhat.com> (raw)
In-Reply-To: <20240217150725.661467-1-lvivier@redhat.com>

If buffer is not aligned use sum_16b() only on the not aligned
part, and then use csum_avx2() on the remaining part

Remove unneeded now function csum_unaligned().

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
---

Notes:
    v3:
      - Add David's R-b
    
    v2:
      - use ROUND_UP() and sizeof(__m256i)
      - fix function comment
      - remove csum_unaligned() and use csum() instead

 checksum.c | 47 ++++++++++++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 23 deletions(-)

diff --git a/checksum.c b/checksum.c
index f21c9b7a14d1..65486b4625ba 100644
--- a/checksum.c
+++ b/checksum.c
@@ -56,6 +56,8 @@
 #include <linux/udp.h>
 #include <linux/icmpv6.h>
 
+#include "util.h"
+
 /* Checksums are optional for UDP over IPv4, so we usually just set
  * them to 0.  Change this to 1 to calculate real UDP over IPv4
  * checksums
@@ -110,20 +112,7 @@ uint16_t csum_fold(uint32_t sum)
 	return sum;
 }
 
-/**
- * csum_unaligned() - Compute TCP/IP-style checksum for not 32-byte aligned data
- * @buf:	Input data
- * @len:	Input length
- * @init:	Initial 32-bit checksum, 0 for no pre-computed checksum
- *
- * Return: 16-bit IPv4-style checksum
- */
-/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
-__attribute__((optimize("-fno-strict-aliasing")))	/* See csum_16b() */
-uint16_t csum_unaligned(const void *buf, size_t len, uint32_t init)
-{
-	return (uint16_t)~csum_fold(sum_16b(buf, len) + init);
-}
+uint16_t csum(const void *buf, size_t len, uint32_t init);
 
 /**
  * csum_ip4_header() - Calculate and set IPv4 header checksum
@@ -132,7 +121,7 @@ uint16_t csum_unaligned(const void *buf, size_t len, uint32_t init)
 void csum_ip4_header(struct iphdr *ip4h)
 {
 	ip4h->check = 0;
-	ip4h->check = csum_unaligned(ip4h, (size_t)ip4h->ihl * 4, 0);
+	ip4h->check = csum(ip4h, (size_t)ip4h->ihl * 4, 0);
 }
 
 /**
@@ -159,7 +148,7 @@ void csum_udp4(struct udphdr *udp4hr,
 			+ htons(IPPROTO_UDP);
 		/* Add in partial checksum for the UDP header alone */
 		psum += sum_16b(udp4hr, sizeof(*udp4hr));
-		udp4hr->check = csum_unaligned(payload, len, psum);
+		udp4hr->check = csum(payload, len, psum);
 	}
 }
 
@@ -178,7 +167,7 @@ void csum_icmp4(struct icmphdr *icmp4hr, const void *payload, size_t len)
 	/* Partial checksum for ICMP header alone */
 	psum = sum_16b(icmp4hr, sizeof(*icmp4hr));
 
-	icmp4hr->checksum = csum_unaligned(payload, len, psum);
+	icmp4hr->checksum = csum(payload, len, psum);
 }
 
 /**
@@ -199,7 +188,7 @@ void csum_udp6(struct udphdr *udp6hr,
 	udp6hr->check = 0;
 	/* Add in partial checksum for the UDP header alone */
 	psum += sum_16b(udp6hr, sizeof(*udp6hr));
-	udp6hr->check = csum_unaligned(payload, len, psum);
+	udp6hr->check = csum(payload, len, psum);
 }
 
 /**
@@ -222,7 +211,7 @@ void csum_icmp6(struct icmp6hdr *icmp6hr,
 	icmp6hr->icmp6_cksum = 0;
 	/* Add in partial checksum for the ICMPv6 header alone */
 	psum += sum_16b(icmp6hr, sizeof(*icmp6hr));
-	icmp6hr->icmp6_cksum = csum_unaligned(payload, len, psum);
+	icmp6hr->icmp6_cksum = csum(payload, len, psum);
 }
 
 #ifdef __AVX2__
@@ -397,17 +386,29 @@ less_than_128_bytes:
 
 /**
  * csum() - Compute TCP/IP-style checksum
- * @buf:	Input buffer, must be aligned to 32-byte boundary
+ * @buf:	Input buffer
  * @len:	Input length
  * @init:	Initial 32-bit checksum, 0 for no pre-computed checksum
  *
- * Return: 16-bit folded, complemented checksum sum
+ * Return: 16-bit folded, complemented checksum
  */
 /* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
 __attribute__((optimize("-fno-strict-aliasing")))	/* See csum_16b() */
 uint16_t csum(const void *buf, size_t len, uint32_t init)
 {
-	return (uint16_t)~csum_fold(csum_avx2(buf, len, init));
+	intptr_t align = ROUND_UP((intptr_t)buf, sizeof(__m256i));
+	unsigned int pad = align - (intptr_t)buf;
+
+	if (len < pad)
+		pad = len;
+
+	if (pad)
+		init += sum_16b(buf, pad);
+
+	if (len > pad)
+		init = csum_avx2((void *)align, len - pad, init);
+
+	return (uint16_t)~csum_fold(init);
 }
 
 #else /* __AVX2__ */
@@ -424,7 +425,7 @@ uint16_t csum(const void *buf, size_t len, uint32_t init)
 __attribute__((optimize("-fno-strict-aliasing")))	/* See csum_16b() */
 uint16_t csum(const void *buf, size_t len, uint32_t init)
 {
-	return csum_unaligned(buf, len, init);
+	return (uint16_t)~csum_fold(sum_16b(buf, len) + init);
 }
 
 #endif /* !__AVX2__ */
-- 
@@ -56,6 +56,8 @@
 #include <linux/udp.h>
 #include <linux/icmpv6.h>
 
+#include "util.h"
+
 /* Checksums are optional for UDP over IPv4, so we usually just set
  * them to 0.  Change this to 1 to calculate real UDP over IPv4
  * checksums
@@ -110,20 +112,7 @@ uint16_t csum_fold(uint32_t sum)
 	return sum;
 }
 
-/**
- * csum_unaligned() - Compute TCP/IP-style checksum for not 32-byte aligned data
- * @buf:	Input data
- * @len:	Input length
- * @init:	Initial 32-bit checksum, 0 for no pre-computed checksum
- *
- * Return: 16-bit IPv4-style checksum
- */
-/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
-__attribute__((optimize("-fno-strict-aliasing")))	/* See csum_16b() */
-uint16_t csum_unaligned(const void *buf, size_t len, uint32_t init)
-{
-	return (uint16_t)~csum_fold(sum_16b(buf, len) + init);
-}
+uint16_t csum(const void *buf, size_t len, uint32_t init);
 
 /**
  * csum_ip4_header() - Calculate and set IPv4 header checksum
@@ -132,7 +121,7 @@ uint16_t csum_unaligned(const void *buf, size_t len, uint32_t init)
 void csum_ip4_header(struct iphdr *ip4h)
 {
 	ip4h->check = 0;
-	ip4h->check = csum_unaligned(ip4h, (size_t)ip4h->ihl * 4, 0);
+	ip4h->check = csum(ip4h, (size_t)ip4h->ihl * 4, 0);
 }
 
 /**
@@ -159,7 +148,7 @@ void csum_udp4(struct udphdr *udp4hr,
 			+ htons(IPPROTO_UDP);
 		/* Add in partial checksum for the UDP header alone */
 		psum += sum_16b(udp4hr, sizeof(*udp4hr));
-		udp4hr->check = csum_unaligned(payload, len, psum);
+		udp4hr->check = csum(payload, len, psum);
 	}
 }
 
@@ -178,7 +167,7 @@ void csum_icmp4(struct icmphdr *icmp4hr, const void *payload, size_t len)
 	/* Partial checksum for ICMP header alone */
 	psum = sum_16b(icmp4hr, sizeof(*icmp4hr));
 
-	icmp4hr->checksum = csum_unaligned(payload, len, psum);
+	icmp4hr->checksum = csum(payload, len, psum);
 }
 
 /**
@@ -199,7 +188,7 @@ void csum_udp6(struct udphdr *udp6hr,
 	udp6hr->check = 0;
 	/* Add in partial checksum for the UDP header alone */
 	psum += sum_16b(udp6hr, sizeof(*udp6hr));
-	udp6hr->check = csum_unaligned(payload, len, psum);
+	udp6hr->check = csum(payload, len, psum);
 }
 
 /**
@@ -222,7 +211,7 @@ void csum_icmp6(struct icmp6hdr *icmp6hr,
 	icmp6hr->icmp6_cksum = 0;
 	/* Add in partial checksum for the ICMPv6 header alone */
 	psum += sum_16b(icmp6hr, sizeof(*icmp6hr));
-	icmp6hr->icmp6_cksum = csum_unaligned(payload, len, psum);
+	icmp6hr->icmp6_cksum = csum(payload, len, psum);
 }
 
 #ifdef __AVX2__
@@ -397,17 +386,29 @@ less_than_128_bytes:
 
 /**
  * csum() - Compute TCP/IP-style checksum
- * @buf:	Input buffer, must be aligned to 32-byte boundary
+ * @buf:	Input buffer
  * @len:	Input length
  * @init:	Initial 32-bit checksum, 0 for no pre-computed checksum
  *
- * Return: 16-bit folded, complemented checksum sum
+ * Return: 16-bit folded, complemented checksum
  */
 /* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
 __attribute__((optimize("-fno-strict-aliasing")))	/* See csum_16b() */
 uint16_t csum(const void *buf, size_t len, uint32_t init)
 {
-	return (uint16_t)~csum_fold(csum_avx2(buf, len, init));
+	intptr_t align = ROUND_UP((intptr_t)buf, sizeof(__m256i));
+	unsigned int pad = align - (intptr_t)buf;
+
+	if (len < pad)
+		pad = len;
+
+	if (pad)
+		init += sum_16b(buf, pad);
+
+	if (len > pad)
+		init = csum_avx2((void *)align, len - pad, init);
+
+	return (uint16_t)~csum_fold(init);
 }
 
 #else /* __AVX2__ */
@@ -424,7 +425,7 @@ uint16_t csum(const void *buf, size_t len, uint32_t init)
 __attribute__((optimize("-fno-strict-aliasing")))	/* See csum_16b() */
 uint16_t csum(const void *buf, size_t len, uint32_t init)
 {
-	return csum_unaligned(buf, len, init);
+	return (uint16_t)~csum_fold(sum_16b(buf, len) + init);
 }
 
 #endif /* !__AVX2__ */
-- 
2.42.0

next prev parent reply	other threads:[~2024-02-17 15:07 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-17 15:07 [PATCH v3 0/9] Add vhost-user support to passt (part 1) Laurent Vivier
2024-02-17 15:07 ` [PATCH v3 1/9] iov: add some functions to manage iovec Laurent Vivier
2024-02-19  2:45   ` David Gibson
2024-02-17 15:07 ` [PATCH v3 2/9] pcap: add pcap_iov() Laurent Vivier
2024-02-19  2:50   ` David Gibson
2024-02-17 15:07 ` Laurent Vivier [this message]
2024-02-17 15:07 ` [PATCH v3 4/9] checksum: add csum_iov() Laurent Vivier
2024-02-19  2:52   ` David Gibson
2024-02-17 15:07 ` [PATCH v3 5/9] util: move IP stuff from util.[ch] to ip.[ch] Laurent Vivier
2024-02-19  2:59   ` David Gibson
2024-02-17 15:07 ` [PATCH v3 6/9] checksum: use csum_ip4_header() in udp.c and tcp.c Laurent Vivier
2024-02-19  3:01   ` David Gibson
2024-02-29 16:24   ` Stefano Brivio
2024-02-29 23:10     ` David Gibson
2024-03-01  7:58       ` Stefano Brivio
2024-03-01 12:23         ` David Gibson
2024-03-04 17:04           ` Stefano Brivio
2024-02-17 15:07 ` [PATCH v3 7/9] checksum: introduce functions to compute the header part checksum for TCP/UDP Laurent Vivier
2024-02-19  3:08   ` David Gibson
2024-02-28 13:26     ` Laurent Vivier
2024-02-29  0:38       ` David Gibson
2024-02-29  7:05         ` Stefano Brivio
2024-02-29  8:49           ` David Gibson
2024-02-29  8:56             ` Stefano Brivio
2024-02-29 14:15               ` Stefano Brivio
2024-02-29 23:09                 ` David Gibson
2024-03-01  6:56                   ` Stefano Brivio
2024-03-04  1:54                     ` David Gibson
2024-03-04 11:00                       ` Stefano Brivio
2024-03-04 22:47                         ` Stefano Brivio
2024-03-06  5:09                           ` David Gibson
2024-03-08  0:08                             ` Stefano Brivio
2024-03-08  1:20                               ` David Gibson
2024-02-17 15:07 ` [PATCH v3 8/9] tap: make tap_update_mac() generic Laurent Vivier
2024-02-17 15:07 ` [PATCH v3 9/9] tcp: Introduce ipv4_fill_headers()/ipv6_fill_headers() Laurent Vivier
2024-02-19  3:14   ` David Gibson
2024-02-29 16:29   ` Stefano Brivio
2024-02-29 16:31 ` [PATCH v3 0/9] Add vhost-user support to passt (part 1) Stefano Brivio

find likely ancestor, descendant, or conflicting patches for this message:
dfblob:f21c9b7a14d dfblob:65486b4625b
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240217150725.661467-4-lvivier@redhat.com \
    --to=lvivier@redhat.com \
    --cc=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).