From: Laurent Vivier <lvivier@redhat.com>
To: passt-dev@passt.top
Cc: Laurent Vivier <lvivier@redhat.com>,
David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v5 2/9] checksum: align buffers
Date: Sun, 3 Mar 2024 14:51:07 +0100 [thread overview]
Message-ID: <20240303135114.1023026-3-lvivier@redhat.com> (raw)
In-Reply-To: <20240303135114.1023026-1-lvivier@redhat.com>
If buffer is not aligned use sum_16b() only on the not aligned
part, and then use csum_avx2() on the remaining part
Remove unneeded now function csum_unaligned().
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
---
Notes:
v4:
- rebase
v3:
- Add David's R-b
v2:
- use ROUND_UP() and sizeof(__m256i)
- fix function comment
- remove csum_unaligned() and use csum() instead
checksum.c | 47 ++++++++++++++++++++++++-----------------------
1 file changed, 24 insertions(+), 23 deletions(-)
diff --git a/checksum.c b/checksum.c
index f21c9b7a14d1..65486b4625ba 100644
--- a/checksum.c
+++ b/checksum.c
@@ -56,6 +56,8 @@
#include <linux/udp.h>
#include <linux/icmpv6.h>
+#include "util.h"
+
/* Checksums are optional for UDP over IPv4, so we usually just set
* them to 0. Change this to 1 to calculate real UDP over IPv4
* checksums
@@ -110,20 +112,7 @@ uint16_t csum_fold(uint32_t sum)
return sum;
}
-/**
- * csum_unaligned() - Compute TCP/IP-style checksum for not 32-byte aligned data
- * @buf: Input data
- * @len: Input length
- * @init: Initial 32-bit checksum, 0 for no pre-computed checksum
- *
- * Return: 16-bit IPv4-style checksum
- */
-/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
-__attribute__((optimize("-fno-strict-aliasing"))) /* See csum_16b() */
-uint16_t csum_unaligned(const void *buf, size_t len, uint32_t init)
-{
- return (uint16_t)~csum_fold(sum_16b(buf, len) + init);
-}
+uint16_t csum(const void *buf, size_t len, uint32_t init);
/**
* csum_ip4_header() - Calculate and set IPv4 header checksum
@@ -132,7 +121,7 @@ uint16_t csum_unaligned(const void *buf, size_t len, uint32_t init)
void csum_ip4_header(struct iphdr *ip4h)
{
ip4h->check = 0;
- ip4h->check = csum_unaligned(ip4h, (size_t)ip4h->ihl * 4, 0);
+ ip4h->check = csum(ip4h, (size_t)ip4h->ihl * 4, 0);
}
/**
@@ -159,7 +148,7 @@ void csum_udp4(struct udphdr *udp4hr,
+ htons(IPPROTO_UDP);
/* Add in partial checksum for the UDP header alone */
psum += sum_16b(udp4hr, sizeof(*udp4hr));
- udp4hr->check = csum_unaligned(payload, len, psum);
+ udp4hr->check = csum(payload, len, psum);
}
}
@@ -178,7 +167,7 @@ void csum_icmp4(struct icmphdr *icmp4hr, const void *payload, size_t len)
/* Partial checksum for ICMP header alone */
psum = sum_16b(icmp4hr, sizeof(*icmp4hr));
- icmp4hr->checksum = csum_unaligned(payload, len, psum);
+ icmp4hr->checksum = csum(payload, len, psum);
}
/**
@@ -199,7 +188,7 @@ void csum_udp6(struct udphdr *udp6hr,
udp6hr->check = 0;
/* Add in partial checksum for the UDP header alone */
psum += sum_16b(udp6hr, sizeof(*udp6hr));
- udp6hr->check = csum_unaligned(payload, len, psum);
+ udp6hr->check = csum(payload, len, psum);
}
/**
@@ -222,7 +211,7 @@ void csum_icmp6(struct icmp6hdr *icmp6hr,
icmp6hr->icmp6_cksum = 0;
/* Add in partial checksum for the ICMPv6 header alone */
psum += sum_16b(icmp6hr, sizeof(*icmp6hr));
- icmp6hr->icmp6_cksum = csum_unaligned(payload, len, psum);
+ icmp6hr->icmp6_cksum = csum(payload, len, psum);
}
#ifdef __AVX2__
@@ -397,17 +386,29 @@ less_than_128_bytes:
/**
* csum() - Compute TCP/IP-style checksum
- * @buf: Input buffer, must be aligned to 32-byte boundary
+ * @buf: Input buffer
* @len: Input length
* @init: Initial 32-bit checksum, 0 for no pre-computed checksum
*
- * Return: 16-bit folded, complemented checksum sum
+ * Return: 16-bit folded, complemented checksum
*/
/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
__attribute__((optimize("-fno-strict-aliasing"))) /* See csum_16b() */
uint16_t csum(const void *buf, size_t len, uint32_t init)
{
- return (uint16_t)~csum_fold(csum_avx2(buf, len, init));
+ intptr_t align = ROUND_UP((intptr_t)buf, sizeof(__m256i));
+ unsigned int pad = align - (intptr_t)buf;
+
+ if (len < pad)
+ pad = len;
+
+ if (pad)
+ init += sum_16b(buf, pad);
+
+ if (len > pad)
+ init = csum_avx2((void *)align, len - pad, init);
+
+ return (uint16_t)~csum_fold(init);
}
#else /* __AVX2__ */
@@ -424,7 +425,7 @@ uint16_t csum(const void *buf, size_t len, uint32_t init)
__attribute__((optimize("-fno-strict-aliasing"))) /* See csum_16b() */
uint16_t csum(const void *buf, size_t len, uint32_t init)
{
- return csum_unaligned(buf, len, init);
+ return (uint16_t)~csum_fold(sum_16b(buf, len) + init);
}
#endif /* !__AVX2__ */
--
@@ -56,6 +56,8 @@
#include <linux/udp.h>
#include <linux/icmpv6.h>
+#include "util.h"
+
/* Checksums are optional for UDP over IPv4, so we usually just set
* them to 0. Change this to 1 to calculate real UDP over IPv4
* checksums
@@ -110,20 +112,7 @@ uint16_t csum_fold(uint32_t sum)
return sum;
}
-/**
- * csum_unaligned() - Compute TCP/IP-style checksum for not 32-byte aligned data
- * @buf: Input data
- * @len: Input length
- * @init: Initial 32-bit checksum, 0 for no pre-computed checksum
- *
- * Return: 16-bit IPv4-style checksum
- */
-/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
-__attribute__((optimize("-fno-strict-aliasing"))) /* See csum_16b() */
-uint16_t csum_unaligned(const void *buf, size_t len, uint32_t init)
-{
- return (uint16_t)~csum_fold(sum_16b(buf, len) + init);
-}
+uint16_t csum(const void *buf, size_t len, uint32_t init);
/**
* csum_ip4_header() - Calculate and set IPv4 header checksum
@@ -132,7 +121,7 @@ uint16_t csum_unaligned(const void *buf, size_t len, uint32_t init)
void csum_ip4_header(struct iphdr *ip4h)
{
ip4h->check = 0;
- ip4h->check = csum_unaligned(ip4h, (size_t)ip4h->ihl * 4, 0);
+ ip4h->check = csum(ip4h, (size_t)ip4h->ihl * 4, 0);
}
/**
@@ -159,7 +148,7 @@ void csum_udp4(struct udphdr *udp4hr,
+ htons(IPPROTO_UDP);
/* Add in partial checksum for the UDP header alone */
psum += sum_16b(udp4hr, sizeof(*udp4hr));
- udp4hr->check = csum_unaligned(payload, len, psum);
+ udp4hr->check = csum(payload, len, psum);
}
}
@@ -178,7 +167,7 @@ void csum_icmp4(struct icmphdr *icmp4hr, const void *payload, size_t len)
/* Partial checksum for ICMP header alone */
psum = sum_16b(icmp4hr, sizeof(*icmp4hr));
- icmp4hr->checksum = csum_unaligned(payload, len, psum);
+ icmp4hr->checksum = csum(payload, len, psum);
}
/**
@@ -199,7 +188,7 @@ void csum_udp6(struct udphdr *udp6hr,
udp6hr->check = 0;
/* Add in partial checksum for the UDP header alone */
psum += sum_16b(udp6hr, sizeof(*udp6hr));
- udp6hr->check = csum_unaligned(payload, len, psum);
+ udp6hr->check = csum(payload, len, psum);
}
/**
@@ -222,7 +211,7 @@ void csum_icmp6(struct icmp6hdr *icmp6hr,
icmp6hr->icmp6_cksum = 0;
/* Add in partial checksum for the ICMPv6 header alone */
psum += sum_16b(icmp6hr, sizeof(*icmp6hr));
- icmp6hr->icmp6_cksum = csum_unaligned(payload, len, psum);
+ icmp6hr->icmp6_cksum = csum(payload, len, psum);
}
#ifdef __AVX2__
@@ -397,17 +386,29 @@ less_than_128_bytes:
/**
* csum() - Compute TCP/IP-style checksum
- * @buf: Input buffer, must be aligned to 32-byte boundary
+ * @buf: Input buffer
* @len: Input length
* @init: Initial 32-bit checksum, 0 for no pre-computed checksum
*
- * Return: 16-bit folded, complemented checksum sum
+ * Return: 16-bit folded, complemented checksum
*/
/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
__attribute__((optimize("-fno-strict-aliasing"))) /* See csum_16b() */
uint16_t csum(const void *buf, size_t len, uint32_t init)
{
- return (uint16_t)~csum_fold(csum_avx2(buf, len, init));
+ intptr_t align = ROUND_UP((intptr_t)buf, sizeof(__m256i));
+ unsigned int pad = align - (intptr_t)buf;
+
+ if (len < pad)
+ pad = len;
+
+ if (pad)
+ init += sum_16b(buf, pad);
+
+ if (len > pad)
+ init = csum_avx2((void *)align, len - pad, init);
+
+ return (uint16_t)~csum_fold(init);
}
#else /* __AVX2__ */
@@ -424,7 +425,7 @@ uint16_t csum(const void *buf, size_t len, uint32_t init)
__attribute__((optimize("-fno-strict-aliasing"))) /* See csum_16b() */
uint16_t csum(const void *buf, size_t len, uint32_t init)
{
- return csum_unaligned(buf, len, init);
+ return (uint16_t)~csum_fold(sum_16b(buf, len) + init);
}
#endif /* !__AVX2__ */
--
2.42.0
next prev parent reply other threads:[~2024-03-03 13:51 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-03-03 13:51 [PATCH v5 0/9] Add vhost-user support to passt (part 1) Laurent Vivier
2024-03-03 13:51 ` [PATCH v5 1/9] pcap: add pcap_iov() Laurent Vivier
2024-03-03 13:51 ` Laurent Vivier [this message]
2024-03-03 13:51 ` [PATCH v5 3/9] checksum: add csum_iov() Laurent Vivier
2024-03-03 13:51 ` [PATCH v5 4/9] util: move IP stuff from util.[ch] to ip.[ch] Laurent Vivier
2024-03-03 13:51 ` [PATCH v5 5/9] udp: little cleanup in udp_update_hdrX() to prepare future changes Laurent Vivier
2024-03-04 0:46 ` David Gibson
2024-03-03 13:51 ` [PATCH v5 6/9] checksum: use csum_ip4_header() in udp.c and tcp.c Laurent Vivier
2024-03-03 13:51 ` [PATCH v5 7/9] checksum: introduce functions to compute the header part checksum for TCP/UDP Laurent Vivier
2024-03-04 0:52 ` David Gibson
2024-03-03 13:51 ` [PATCH v5 8/9] tap: make tap_update_mac() generic Laurent Vivier
2024-03-03 13:51 ` [PATCH v5 9/9] tcp: Introduce ipv4_fill_headers()/ipv6_fill_headers() Laurent Vivier
2024-03-04 0:54 ` David Gibson
2024-03-05 22:12 ` [PATCH v5 0/9] Add vhost-user support to passt (part 1) Stefano Brivio
2024-03-06 3:22 ` David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240303135114.1023026-3-lvivier@redhat.com \
--to=lvivier@redhat.com \
--cc=david@gibson.dropbear.id.au \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).