From: Laurent Vivier <lvivier@redhat.com>
To: passt-dev@passt.top
Cc: Laurent Vivier <lvivier@redhat.com>
Subject: [PATCH v5 4/5] tcp: Update TCP checksum using an iovec array
Date: Fri, 27 Sep 2024 15:53:48 +0200 [thread overview]
Message-ID: <20240927135349.675850-5-lvivier@redhat.com> (raw)
In-Reply-To: <20240927135349.675850-1-lvivier@redhat.com>
TCP header and payload are supposed to be in the same buffer,
and tcp_update_check_tcp4()/tcp_update_check_tcp6() compute
the checksum from the base address of the header using the
length of the IP payload.
In the future (for vhost-user) we need to dispatch the TCP header and
the TCP payload through several buffers. To be able to manage that, we
provide an iovec array that points to the data of the TCP frame.
We provide also an offset to be able to provide an array that contains
the TCP frame embedded in an lower level frame, and this offset points
to the TCP header inside the iovec array.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
Notes:
v5:
- s/IPv6/IPv4/
- reintroduce ip6h and iph to avoid iov_size()
- check pointer alignment before casting to the type
v4:
- replace die() by err() in tcp_update_check_tcp6() too
v3:
- replace die() by err() and return
- add more information in the error message
v2:
- s/payload_offset/l4offset/
- check memory address of the checksum (alignment, iovec boundaries)
checksum.c | 1 -
iov.c | 1 -
tcp.c | 122 ++++++++++++++++++++++++++++++++++++++++++++---------
3 files changed, 102 insertions(+), 22 deletions(-)
diff --git a/checksum.c b/checksum.c
index 05d002ab0c25..cf850196cca0 100644
--- a/checksum.c
+++ b/checksum.c
@@ -503,7 +503,6 @@ uint16_t csum(const void *buf, size_t len, uint32_t init)
*
* Return: 16-bit folded, complemented checksum
*/
-/* cppcheck-suppress unusedFunction */
uint16_t csum_iov(const struct iovec *iov, size_t n, size_t offset,
uint32_t init)
{
diff --git a/iov.c b/iov.c
index 3f9e229a305f..9116dda94247 100644
--- a/iov.c
+++ b/iov.c
@@ -25,7 +25,6 @@
#include "util.h"
#include "iov.h"
-
/* iov_skip_bytes() - Skip leading bytes of an IO vector
* @iov: IO vector
* @n: Number of entries in @iov
diff --git a/tcp.c b/tcp.c
index c9472d905520..df3147a673d1 100644
--- a/tcp.c
+++ b/tcp.c
@@ -755,36 +755,106 @@ static void tcp_sock_set_bufsize(const struct ctx *c, int s)
}
/**
- * tcp_update_check_tcp4() - Update TCP checksum from stored one
+ * tcp_update_check_tcp4() - Calculate TCP checksum for IPv4
* @iph: IPv4 header
- * @bp: TCP header followed by TCP payload
+ * @iov: Pointer to the array of IO vectors
+ * @iov_cnt: Length of the array
+ * @l4offset: IPv4 payload offset in the iovec array
*/
-static void tcp_update_check_tcp4(const struct iphdr *iph,
- struct tcp_payload_t *bp)
+void tcp_update_check_tcp4(const struct iphdr *iph,
+ const struct iovec *iov, int iov_cnt,
+ size_t l4offset)
{
uint16_t l4len = ntohs(iph->tot_len) - sizeof(struct iphdr);
struct in_addr saddr = { .s_addr = iph->saddr };
struct in_addr daddr = { .s_addr = iph->daddr };
- uint32_t sum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, saddr, daddr);
+ size_t check_ofs;
+ __sum16 *check;
+ int check_idx;
+ uint32_t sum;
+ uintptr_t ptr;
+
+ sum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, saddr, daddr);
+
+ check_idx = iov_skip_bytes(iov, iov_cnt,
+ l4offset + offsetof(struct tcphdr, check),
+ &check_ofs);
+
+ if (check_idx >= iov_cnt) {
+ err("TCP4 buffer is too small, iov size %zd, check offset %zd",
+ iov_size(iov, iov_cnt),
+ l4offset + offsetof(struct tcphdr, check));
+ return;
+ }
- bp->th.check = 0;
- bp->th.check = csum(bp, l4len, sum);
+ if (check_ofs + sizeof(*check) > iov[check_idx].iov_len) {
+ err("TCP4 checksum field memory is not contiguous "
+ "check_ofs %zd check_idx %d iov_len %zd",
+ check_ofs, check_idx, iov[check_idx].iov_len);
+ return;
+ }
+
+ ptr = (uintptr_t)((char *)iov[check_idx].iov_base + check_ofs);
+ if (ptr & (__alignof__(*check) - 1)) {
+ err("TCP4 checksum field is not correctly aligned in memory");
+ return;
+ }
+
+ check = (__sum16 *)ptr;
+
+ *check = 0;
+ *check = csum_iov(iov, iov_cnt, l4offset, sum);
}
/**
* tcp_update_check_tcp6() - Calculate TCP checksum for IPv6
* @ip6h: IPv6 header
- * @bp: TCP header followed by TCP payload
+ * @iov: Pointer to the array of IO vectors
+ * @iov_cnt: Length of the array
+ * @l4offset: IPv6 payload offset in the iovec array
*/
-static void tcp_update_check_tcp6(const struct ipv6hdr *ip6h,
- struct tcp_payload_t *bp)
+void tcp_update_check_tcp6(const struct ipv6hdr *ip6h,
+ const struct iovec *iov, int iov_cnt,
+ size_t l4offset)
{
uint16_t l4len = ntohs(ip6h->payload_len);
- uint32_t sum = proto_ipv6_header_psum(l4len, IPPROTO_TCP,
- &ip6h->saddr, &ip6h->daddr);
+ size_t check_ofs;
+ __sum16 *check;
+ int check_idx;
+ uint32_t sum;
+ uintptr_t ptr;
+
+ sum = proto_ipv6_header_psum(l4len, IPPROTO_TCP, &ip6h->saddr,
+ &ip6h->daddr);
+
+ check_idx = iov_skip_bytes(iov, iov_cnt,
+ l4offset + offsetof(struct tcphdr, check),
+ &check_ofs);
+
+ if (check_idx >= iov_cnt) {
+ err("TCP6 buffer is too small, iov size %zd, check offset %zd",
+ iov_size(iov, iov_cnt),
+ l4offset + offsetof(struct tcphdr, check));
+ return;
+ }
+
+ if (check_ofs + sizeof(*check) > iov[check_idx].iov_len) {
+ err("TCP6 checksum field memory is not contiguous "
+ "check_ofs %zd check_idx %d iov_len %zd",
+ check_ofs, check_idx, iov[check_idx].iov_len);
+ return;
+ }
+
+ ptr = (uintptr_t)((char *)iov[check_idx].iov_base + check_ofs);
+ if (ptr & (__alignof__(*check) - 1)) {
+ err("TCP6 checksum field is not correctly aligned in memory");
+ return;
+ }
- bp->th.check = 0;
- bp->th.check = csum(bp, l4len, sum);
+ check = (__sum16 *)ptr;
+
+ *check = 0;
+ *check = csum_iov(iov, iov_cnt, l4offset, sum);
}
/**
@@ -935,10 +1005,16 @@ static size_t tcp_fill_headers4(const struct tcp_tap_conn *conn,
tcp_fill_header(&bp->th, conn, seq);
- if (no_tcp_csum)
+ if (no_tcp_csum) {
bp->th.check = 0;
- else
- tcp_update_check_tcp4(iph, bp);
+ } else {
+ const struct iovec iov = {
+ .iov_base = bp,
+ .iov_len = ntohs(iph->tot_len) - sizeof(struct iphdr),
+ };
+
+ tcp_update_check_tcp4(iph, &iov, 1, 0);
+ }
tap_hdr_update(taph, l3len + sizeof(struct ethhdr));
@@ -980,10 +1056,16 @@ static size_t tcp_fill_headers6(const struct tcp_tap_conn *conn,
tcp_fill_header(&bp->th, conn, seq);
- if (no_tcp_csum)
+ if (no_tcp_csum) {
bp->th.check = 0;
- else
- tcp_update_check_tcp6(ip6h, bp);
+ } else {
+ const struct iovec iov = {
+ .iov_base = bp,
+ .iov_len = ntohs(ip6h->payload_len)
+ };
+
+ tcp_update_check_tcp6(ip6h, &iov, 1, 0);
+ }
tap_hdr_update(taph, l4len + sizeof(*ip6h) + sizeof(struct ethhdr));
--
@@ -755,36 +755,106 @@ static void tcp_sock_set_bufsize(const struct ctx *c, int s)
}
/**
- * tcp_update_check_tcp4() - Update TCP checksum from stored one
+ * tcp_update_check_tcp4() - Calculate TCP checksum for IPv4
* @iph: IPv4 header
- * @bp: TCP header followed by TCP payload
+ * @iov: Pointer to the array of IO vectors
+ * @iov_cnt: Length of the array
+ * @l4offset: IPv4 payload offset in the iovec array
*/
-static void tcp_update_check_tcp4(const struct iphdr *iph,
- struct tcp_payload_t *bp)
+void tcp_update_check_tcp4(const struct iphdr *iph,
+ const struct iovec *iov, int iov_cnt,
+ size_t l4offset)
{
uint16_t l4len = ntohs(iph->tot_len) - sizeof(struct iphdr);
struct in_addr saddr = { .s_addr = iph->saddr };
struct in_addr daddr = { .s_addr = iph->daddr };
- uint32_t sum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, saddr, daddr);
+ size_t check_ofs;
+ __sum16 *check;
+ int check_idx;
+ uint32_t sum;
+ uintptr_t ptr;
+
+ sum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, saddr, daddr);
+
+ check_idx = iov_skip_bytes(iov, iov_cnt,
+ l4offset + offsetof(struct tcphdr, check),
+ &check_ofs);
+
+ if (check_idx >= iov_cnt) {
+ err("TCP4 buffer is too small, iov size %zd, check offset %zd",
+ iov_size(iov, iov_cnt),
+ l4offset + offsetof(struct tcphdr, check));
+ return;
+ }
- bp->th.check = 0;
- bp->th.check = csum(bp, l4len, sum);
+ if (check_ofs + sizeof(*check) > iov[check_idx].iov_len) {
+ err("TCP4 checksum field memory is not contiguous "
+ "check_ofs %zd check_idx %d iov_len %zd",
+ check_ofs, check_idx, iov[check_idx].iov_len);
+ return;
+ }
+
+ ptr = (uintptr_t)((char *)iov[check_idx].iov_base + check_ofs);
+ if (ptr & (__alignof__(*check) - 1)) {
+ err("TCP4 checksum field is not correctly aligned in memory");
+ return;
+ }
+
+ check = (__sum16 *)ptr;
+
+ *check = 0;
+ *check = csum_iov(iov, iov_cnt, l4offset, sum);
}
/**
* tcp_update_check_tcp6() - Calculate TCP checksum for IPv6
* @ip6h: IPv6 header
- * @bp: TCP header followed by TCP payload
+ * @iov: Pointer to the array of IO vectors
+ * @iov_cnt: Length of the array
+ * @l4offset: IPv6 payload offset in the iovec array
*/
-static void tcp_update_check_tcp6(const struct ipv6hdr *ip6h,
- struct tcp_payload_t *bp)
+void tcp_update_check_tcp6(const struct ipv6hdr *ip6h,
+ const struct iovec *iov, int iov_cnt,
+ size_t l4offset)
{
uint16_t l4len = ntohs(ip6h->payload_len);
- uint32_t sum = proto_ipv6_header_psum(l4len, IPPROTO_TCP,
- &ip6h->saddr, &ip6h->daddr);
+ size_t check_ofs;
+ __sum16 *check;
+ int check_idx;
+ uint32_t sum;
+ uintptr_t ptr;
+
+ sum = proto_ipv6_header_psum(l4len, IPPROTO_TCP, &ip6h->saddr,
+ &ip6h->daddr);
+
+ check_idx = iov_skip_bytes(iov, iov_cnt,
+ l4offset + offsetof(struct tcphdr, check),
+ &check_ofs);
+
+ if (check_idx >= iov_cnt) {
+ err("TCP6 buffer is too small, iov size %zd, check offset %zd",
+ iov_size(iov, iov_cnt),
+ l4offset + offsetof(struct tcphdr, check));
+ return;
+ }
+
+ if (check_ofs + sizeof(*check) > iov[check_idx].iov_len) {
+ err("TCP6 checksum field memory is not contiguous "
+ "check_ofs %zd check_idx %d iov_len %zd",
+ check_ofs, check_idx, iov[check_idx].iov_len);
+ return;
+ }
+
+ ptr = (uintptr_t)((char *)iov[check_idx].iov_base + check_ofs);
+ if (ptr & (__alignof__(*check) - 1)) {
+ err("TCP6 checksum field is not correctly aligned in memory");
+ return;
+ }
- bp->th.check = 0;
- bp->th.check = csum(bp, l4len, sum);
+ check = (__sum16 *)ptr;
+
+ *check = 0;
+ *check = csum_iov(iov, iov_cnt, l4offset, sum);
}
/**
@@ -935,10 +1005,16 @@ static size_t tcp_fill_headers4(const struct tcp_tap_conn *conn,
tcp_fill_header(&bp->th, conn, seq);
- if (no_tcp_csum)
+ if (no_tcp_csum) {
bp->th.check = 0;
- else
- tcp_update_check_tcp4(iph, bp);
+ } else {
+ const struct iovec iov = {
+ .iov_base = bp,
+ .iov_len = ntohs(iph->tot_len) - sizeof(struct iphdr),
+ };
+
+ tcp_update_check_tcp4(iph, &iov, 1, 0);
+ }
tap_hdr_update(taph, l3len + sizeof(struct ethhdr));
@@ -980,10 +1056,16 @@ static size_t tcp_fill_headers6(const struct tcp_tap_conn *conn,
tcp_fill_header(&bp->th, conn, seq);
- if (no_tcp_csum)
+ if (no_tcp_csum) {
bp->th.check = 0;
- else
- tcp_update_check_tcp6(ip6h, bp);
+ } else {
+ const struct iovec iov = {
+ .iov_base = bp,
+ .iov_len = ntohs(ip6h->payload_len)
+ };
+
+ tcp_update_check_tcp6(ip6h, &iov, 1, 0);
+ }
tap_hdr_update(taph, l4len + sizeof(*ip6h) + sizeof(struct ethhdr));
--
2.46.0
next prev parent reply other threads:[~2024-09-27 13:54 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-27 13:53 [PATCH v5 0/5] tcp: use csum_iov() in tcp_update_check_tcp[4|6]() Laurent Vivier
2024-09-27 13:53 ` [PATCH v5 1/5] tcp: Use tcp_payload_t rather than tcphdr Laurent Vivier
2024-09-27 13:53 ` [PATCH v5 2/5] pcap: Add an offset argument in pcap_iov() Laurent Vivier
2024-09-27 13:53 ` [PATCH v5 3/5] checksum: Add an offset argument in csum_iov() Laurent Vivier
2024-09-30 2:51 ` David Gibson
2024-09-27 13:53 ` Laurent Vivier [this message]
2024-09-30 2:56 ` [PATCH v5 4/5] tcp: Update TCP checksum using an iovec array David Gibson
2024-10-01 7:29 ` Stefano Brivio
2024-10-01 18:22 ` Stefano Brivio
2024-10-02 7:39 ` Laurent Vivier
2024-10-02 9:00 ` Stefano Brivio
2024-10-03 12:58 ` Laurent Vivier
2024-09-27 13:53 ` [PATCH v5 5/5] udp: Update UDP " Laurent Vivier
2024-09-30 2:58 ` David Gibson
2024-10-02 14:32 ` [PATCH v5 0/5] tcp: use csum_iov() in tcp_update_check_tcp[4|6]() Stefano Brivio
2024-10-02 14:46 ` Stefano Brivio
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240927135349.675850-5-lvivier@redhat.com \
--to=lvivier@redhat.com \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).