From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from gandalf.ozlabs.org (mail.ozlabs.org [IPv6:2404:9400:2221:ea00::3]) by passt.top (Postfix) with ESMTPS id 858B45A0276 for ; Wed, 6 Mar 2024 06:58:48 +0100 (CET) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gibson.dropbear.id.au; s=202312; t=1709704722; bh=xiV0/PHFLauHWMFl08rNwvQZMUVzzVl50fP/TW2mR3o=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=komQ7Dl+HqpL8waQ3zNFiyQiYONpq63fVOv2LCdpd9pTR2N94qF7LTtFNR5IN3OC0 CRDsc0Gp962N5ZI56Xn7fOJMwqB1DtriPJsCBvqOm+VyZ3yvIV2YvZP3FsmWq+3IzK QrDOqMR/9bFPrGXseHOBftKAofBBuQ4huycKvcE1RUhcCqNE59cflAwf5nSKQ2DKNH aDdfg7hF6IoxU1zr2Afxc5syQx+skOSASBFI3FJ34wjIQsXN86QKMQoe+e7icBsDEO IEfZvg2x0BbsRPxL7LeFgSR3ReixncYQ+tGYiYTI6ccVsiH8lfcmPlW1TPfUUn8X2w CPKsbeknYzx7A== Received: by gandalf.ozlabs.org (Postfix, from userid 1007) id 4TqMDy1yVWz4wcJ; Wed, 6 Mar 2024 16:58:42 +1100 (AEDT) From: David Gibson To: passt-dev@passt.top, Stefano Brivio Subject: [PATCH v2 2/9] checksum: align buffers Date: Wed, 6 Mar 2024 16:58:31 +1100 Message-ID: <20240306055838.1180477-3-david@gibson.dropbear.id.au> X-Mailer: git-send-email 2.44.0 In-Reply-To: <20240306055838.1180477-1-david@gibson.dropbear.id.au> References: <20240306055838.1180477-1-david@gibson.dropbear.id.au> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Message-ID-Hash: IJCEQHHTPN56P7B4GJSGAX7X6DVESUMF X-Message-ID-Hash: IJCEQHHTPN56P7B4GJSGAX7X6DVESUMF X-MailFrom: dgibson@gandalf.ozlabs.org X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header CC: lvivier@redhat.com, David Gibson X-Mailman-Version: 3.3.8 Precedence: list List-Id: Development discussion and patches for passt Archived-At: Archived-At: List-Archive: List-Archive: List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: From: Laurent Vivier If buffer is not aligned use sum_16b() only on the not aligned part, and then use csum_avx2() on the remaining part Remove unneeded now function csum_unaligned(). Signed-off-by: Laurent Vivier Reviewed-by: David Gibson Message-ID: <20240303135114.1023026-3-lvivier@redhat.com> Signed-off-by: David Gibson --- checksum.c | 47 ++++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/checksum.c b/checksum.c index f21c9b7a..65486b46 100644 --- a/checksum.c +++ b/checksum.c @@ -56,6 +56,8 @@ #include #include +#include "util.h" + /* Checksums are optional for UDP over IPv4, so we usually just set * them to 0. Change this to 1 to calculate real UDP over IPv4 * checksums @@ -110,20 +112,7 @@ uint16_t csum_fold(uint32_t sum) return sum; } -/** - * csum_unaligned() - Compute TCP/IP-style checksum for not 32-byte aligned data - * @buf: Input data - * @len: Input length - * @init: Initial 32-bit checksum, 0 for no pre-computed checksum - * - * Return: 16-bit IPv4-style checksum - */ -/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */ -__attribute__((optimize("-fno-strict-aliasing"))) /* See csum_16b() */ -uint16_t csum_unaligned(const void *buf, size_t len, uint32_t init) -{ - return (uint16_t)~csum_fold(sum_16b(buf, len) + init); -} +uint16_t csum(const void *buf, size_t len, uint32_t init); /** * csum_ip4_header() - Calculate and set IPv4 header checksum @@ -132,7 +121,7 @@ uint16_t csum_unaligned(const void *buf, size_t len, uint32_t init) void csum_ip4_header(struct iphdr *ip4h) { ip4h->check = 0; - ip4h->check = csum_unaligned(ip4h, (size_t)ip4h->ihl * 4, 0); + ip4h->check = csum(ip4h, (size_t)ip4h->ihl * 4, 0); } /** @@ -159,7 +148,7 @@ void csum_udp4(struct udphdr *udp4hr, + htons(IPPROTO_UDP); /* Add in partial checksum for the UDP header alone */ psum += sum_16b(udp4hr, sizeof(*udp4hr)); - udp4hr->check = csum_unaligned(payload, len, psum); + udp4hr->check = csum(payload, len, psum); } } @@ -178,7 +167,7 @@ void csum_icmp4(struct icmphdr *icmp4hr, const void *payload, size_t len) /* Partial checksum for ICMP header alone */ psum = sum_16b(icmp4hr, sizeof(*icmp4hr)); - icmp4hr->checksum = csum_unaligned(payload, len, psum); + icmp4hr->checksum = csum(payload, len, psum); } /** @@ -199,7 +188,7 @@ void csum_udp6(struct udphdr *udp6hr, udp6hr->check = 0; /* Add in partial checksum for the UDP header alone */ psum += sum_16b(udp6hr, sizeof(*udp6hr)); - udp6hr->check = csum_unaligned(payload, len, psum); + udp6hr->check = csum(payload, len, psum); } /** @@ -222,7 +211,7 @@ void csum_icmp6(struct icmp6hdr *icmp6hr, icmp6hr->icmp6_cksum = 0; /* Add in partial checksum for the ICMPv6 header alone */ psum += sum_16b(icmp6hr, sizeof(*icmp6hr)); - icmp6hr->icmp6_cksum = csum_unaligned(payload, len, psum); + icmp6hr->icmp6_cksum = csum(payload, len, psum); } #ifdef __AVX2__ @@ -397,17 +386,29 @@ less_than_128_bytes: /** * csum() - Compute TCP/IP-style checksum - * @buf: Input buffer, must be aligned to 32-byte boundary + * @buf: Input buffer * @len: Input length * @init: Initial 32-bit checksum, 0 for no pre-computed checksum * - * Return: 16-bit folded, complemented checksum sum + * Return: 16-bit folded, complemented checksum */ /* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */ __attribute__((optimize("-fno-strict-aliasing"))) /* See csum_16b() */ uint16_t csum(const void *buf, size_t len, uint32_t init) { - return (uint16_t)~csum_fold(csum_avx2(buf, len, init)); + intptr_t align = ROUND_UP((intptr_t)buf, sizeof(__m256i)); + unsigned int pad = align - (intptr_t)buf; + + if (len < pad) + pad = len; + + if (pad) + init += sum_16b(buf, pad); + + if (len > pad) + init = csum_avx2((void *)align, len - pad, init); + + return (uint16_t)~csum_fold(init); } #else /* __AVX2__ */ @@ -424,7 +425,7 @@ uint16_t csum(const void *buf, size_t len, uint32_t init) __attribute__((optimize("-fno-strict-aliasing"))) /* See csum_16b() */ uint16_t csum(const void *buf, size_t len, uint32_t init) { - return csum_unaligned(buf, len, init); + return (uint16_t)~csum_fold(sum_16b(buf, len) + init); } #endif /* !__AVX2__ */ -- 2.44.0