From: David Gibson <david@gibson.dropbear.id.au>
To: Laurent Vivier <lvivier@redhat.com>
Cc: passt-dev@passt.top
Subject: Re: [PATCH 2/7] tcp: use iov_tail to access headers in tcp_fill_headers()
Date: Tue, 24 Mar 2026 14:58:39 +1100 [thread overview]
Message-ID: <acIL78P9hW1ne0iR@zatzit> (raw)
In-Reply-To: <20260323165259.1253482-3-lvivier@redhat.com>
[-- Attachment #1: Type: text/plain, Size: 9697 bytes --]
On Mon, Mar 23, 2026 at 05:52:54PM +0100, Laurent Vivier wrote:
> Instead of receiving individual pointers to each protocol header (eh,
> ip4h, ip6h, th), have tcp_fill_headers() take an iov_tail starting at
> the Ethernet header and walk through it using with_header() and
> IOV_DROP_HEADER() to access each header in turn.
>
> Replace the ip4h/ip6h NULL-pointer convention with a bool ipv4
> parameter, and move Ethernet header filling (MAC address and ethertype)
> into tcp_fill_headers() as well, since the function now owns the full
> header chain.
>
> This simplifies callers, which no longer need to extract and pass
> individual header pointers.
>
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> ---
> tcp.c | 106 +++++++++++++++++++++++++++----------------------
> tcp_buf.c | 16 ++------
> tcp_internal.h | 4 +-
> tcp_vu.c | 12 +++---
> 4 files changed, 70 insertions(+), 68 deletions(-)
>
> diff --git a/tcp.c b/tcp.c
> index 158a5be0327e..058792d5b184 100644
> --- a/tcp.c
> +++ b/tcp.c
> @@ -938,11 +938,8 @@ static void tcp_fill_header(struct tcphdr *th,
> * tcp_fill_headers() - Fill 802.3, IP, TCP headers
> * @c: Execution context
> * @conn: Connection pointer
> - * @eh: Pointer to Ethernet header
> - * @ip4h: Pointer to IPv4 header, or NULL
> - * @ip6h: Pointer to IPv6 header, or NULL
> - * @th: Pointer to TCP header
> - * @payload: TCP payload
> + * @ipv4: True for IPv4, false for IPv6
> + * @payload: IOV tail starting at the Ethernet header
As UDP I'm not sure I like removing the separater parameters. A
little more so, since this doesn't (any more) have the ugly
udp_payload_t equivalent. At minimum @payload is no longer a good
name.
> * @ip4_check: IPv4 checksum, if already known
> * @seq: Sequence number for this segment
> * @no_tcp_csum: Do not set TCP checksum
> @@ -950,74 +947,89 @@ static void tcp_fill_header(struct tcphdr *th,
> * Return: frame length (including L2 headers)
> */
> size_t tcp_fill_headers(const struct ctx *c, struct tcp_tap_conn *conn,
> - struct ethhdr *eh,
> - struct iphdr *ip4h, struct ipv6hdr *ip6h,
> - struct tcphdr *th, struct iov_tail *payload,
> + bool ipv4, const struct iov_tail *payload,
> int ip4_check, uint32_t seq, bool no_tcp_csum)
> {
> const struct flowside *tapside = TAPFLOW(conn);
> - size_t l4len = iov_tail_size(payload) + sizeof(*th);
> + struct iov_tail current = *payload;
> uint8_t *omac = conn->f.tap_omac;
> - size_t l3len = l4len;
> + size_t l3len, l4len;
> uint32_t psum = 0;
>
> - if (ip4h) {
> + with_header(struct ethhdr, eh, ¤t) {
> + if (ipv4)
> + eh->h_proto = htons_constant(ETH_P_IP);
> + else
> + eh->h_proto = htons_constant(ETH_P_IPV6);
> +
> + /* Find if neighbour table has a recorded MAC address */
> + if (MAC_IS_UNDEF(omac))
> + fwd_neigh_mac_get(c, &tapside->oaddr, omac);
> + eth_update_mac(eh, NULL, omac);
> + }
> + IOV_DROP_HEADER(¤t, struct ethhdr);
> +
> + l3len = iov_tail_size(¤t);
> +
> + if (ipv4) {
> const struct in_addr *src4 = inany_v4(&tapside->oaddr);
> const struct in_addr *dst4 = inany_v4(&tapside->eaddr);
>
> assert(src4 && dst4);
>
> - l3len += + sizeof(*ip4h);
> + l4len = l3len - sizeof(struct iphdr);
>
> - ip4h->tot_len = htons(l3len);
> - ip4h->saddr = src4->s_addr;
> - ip4h->daddr = dst4->s_addr;
> + with_header(struct iphdr, ip4h, ¤t) {
> + ip4h->tot_len = htons(l3len);
> + ip4h->saddr = src4->s_addr;
> + ip4h->daddr = dst4->s_addr;
>
> - if (ip4_check != -1)
> - ip4h->check = ip4_check;
> - else
> - ip4h->check = csum_ip4_header(l3len, IPPROTO_TCP,
> - *src4, *dst4);
> + if (ip4_check != -1)
> + ip4h->check = ip4_check;
> + else
> + ip4h->check = csum_ip4_header(l3len,
> + IPPROTO_TCP,
> + *src4, *dst4);
> + }
> + IOV_DROP_HEADER(¤t, struct iphdr);
>
> if (!no_tcp_csum) {
> psum = proto_ipv4_header_psum(l4len, IPPROTO_TCP,
> *src4, *dst4);
> }
> - eh->h_proto = htons_constant(ETH_P_IP);
> - }
> -
> - if (ip6h) {
> - l3len += sizeof(*ip6h);
> + } else {
> + l4len = l3len - sizeof(struct ipv6hdr);
>
> - ip6h->payload_len = htons(l4len);
> - ip6h->saddr = tapside->oaddr.a6;
> - ip6h->daddr = tapside->eaddr.a6;
> + with_header(struct ipv6hdr, ip6h, ¤t) {
> + ip6h->payload_len = htons(l4len);
> + ip6h->saddr = tapside->oaddr.a6;
> + ip6h->daddr = tapside->eaddr.a6;
>
> - ip6h->hop_limit = 255;
> - ip6h->version = 6;
> - ip6h->nexthdr = IPPROTO_TCP;
> + ip6h->hop_limit = 255;
> + ip6h->version = 6;
> + ip6h->nexthdr = IPPROTO_TCP;
>
> - ip6_set_flow_lbl(ip6h, conn->sock);
> + ip6_set_flow_lbl(ip6h, conn->sock);
>
> - if (!no_tcp_csum) {
> - psum = proto_ipv6_header_psum(l4len, IPPROTO_TCP,
> - &ip6h->saddr,
> - &ip6h->daddr);
> + if (!no_tcp_csum) {
> + psum = proto_ipv6_header_psum(l4len,
> + IPPROTO_TCP,
> + &ip6h->saddr,
> + &ip6h->daddr);
> + }
> }
> - eh->h_proto = htons_constant(ETH_P_IPV6);
> + IOV_DROP_HEADER(¤t, struct ipv6hdr);
> }
>
> - /* Find if neighbour table has a recorded MAC address */
> - if (MAC_IS_UNDEF(omac))
> - fwd_neigh_mac_get(c, &tapside->oaddr, omac);
> - eth_update_mac(eh, NULL, omac);
> -
> - tcp_fill_header(th, conn, seq);
> -
> - if (no_tcp_csum)
> + with_header(struct tcphdr, th, ¤t) {
> + tcp_fill_header(th, conn, seq);
> th->check = 0;
> - else
> - tcp_update_csum(psum, th, payload);
> + }
> +
> + if (!no_tcp_csum) {
> + with_header(struct tcphdr, th, ¤t)
> + th->check = csum_iov_tail(¤t, psum);
> + }
>
> return MAX(l3len + sizeof(struct ethhdr), ETH_ZLEN);
> }
> diff --git a/tcp_buf.c b/tcp_buf.c
> index bc0f58dd7a5e..891043c96dcb 100644
> --- a/tcp_buf.c
> +++ b/tcp_buf.c
> @@ -175,23 +175,15 @@ static void tcp_l2_buf_fill_headers(const struct ctx *c,
> struct iovec *iov, int check,
> uint32_t seq, bool no_tcp_csum)
> {
> - struct iov_tail tail = IOV_TAIL(&iov[TCP_IOV_PAYLOAD], 1, 0);
> - struct tcphdr th_storage, *th = IOV_REMOVE_HEADER(&tail, th_storage);
> + struct iov_tail tail = IOV_TAIL(&iov[TCP_IOV_ETH],
> + TCP_IOV_PAYLOAD + 1 - TCP_IOV_ETH, 0);
> struct tap_hdr *taph = iov[TCP_IOV_TAP].iov_base;
> const struct flowside *tapside = TAPFLOW(conn);
> - const struct in_addr *a4 = inany_v4(&tapside->oaddr);
> - struct ethhdr *eh = iov[TCP_IOV_ETH].iov_base;
> - struct ipv6hdr *ip6h = NULL;
> - struct iphdr *ip4h = NULL;
> + bool ipv4 = inany_v4(&tapside->oaddr) != NULL;
> size_t l2len;
>
> - if (a4)
> - ip4h = iov[TCP_IOV_IP].iov_base;
> - else
> - ip6h = iov[TCP_IOV_IP].iov_base;
> + l2len = tcp_fill_headers(c, conn, ipv4, &tail, check, seq, no_tcp_csum);
>
> - l2len = tcp_fill_headers(c, conn, eh, ip4h, ip6h, th, &tail, check, seq,
> - no_tcp_csum);
> tap_hdr_update(taph, l2len);
> }
>
> diff --git a/tcp_internal.h b/tcp_internal.h
> index bb7a6629839c..136e947f6e70 100644
> --- a/tcp_internal.h
> +++ b/tcp_internal.h
> @@ -184,9 +184,7 @@ void tcp_rst_do(const struct ctx *c, struct tcp_tap_conn *conn);
> struct tcp_info_linux;
>
> size_t tcp_fill_headers(const struct ctx *c, struct tcp_tap_conn *conn,
> - struct ethhdr *eh,
> - struct iphdr *ip4h, struct ipv6hdr *ip6h,
> - struct tcphdr *th, struct iov_tail *payload,
> + bool ipv4, const struct iov_tail *payload,
> int ip4_check, uint32_t seq, bool no_tcp_csum);
>
> int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
> diff --git a/tcp_vu.c b/tcp_vu.c
> index a21ee3499aed..c6206b7a689c 100644
> --- a/tcp_vu.c
> +++ b/tcp_vu.c
> @@ -132,13 +132,12 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
> }
>
> vu_pad(&flags_iov[0], 1, 0, hdrlen + optlen);
> - payload = IOV_TAIL(flags_elem[0].in_sg, 1, hdrlen);
>
> if (flags & KEEPALIVE)
> seq--;
>
> - tcp_fill_headers(c, conn, eh, ip4h, ip6h, th, &payload,
> - -1, seq, !*c->pcap);
> + payload = IOV_TAIL(flags_elem[0].in_sg, 1, VNET_HLEN);
> + tcp_fill_headers(c, conn, CONN_V4(conn), &payload, -1, seq, !*c->pcap);
>
> if (*c->pcap)
> pcap_iov(&flags_elem[0].in_sg[0], 1, VNET_HLEN);
> @@ -288,10 +287,10 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
> const struct flowside *toside = TAPFLOW(conn);
> bool v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
> size_t hdrlen = tcp_vu_hdrlen(v6);
> - struct iov_tail payload = IOV_TAIL(iov, iov_cnt, hdrlen);
> char *base = iov[0].iov_base;
> struct ipv6hdr *ip6h = NULL;
> struct iphdr *ip4h = NULL;
> + struct iov_tail payload;
> struct tcphdr *th;
> struct ethhdr *eh;
>
> @@ -326,8 +325,9 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
> th->ack = 1;
> th->psh = push;
>
> - tcp_fill_headers(c, conn, eh, ip4h, ip6h, th, &payload,
> - *check, conn->seq_to_tap, no_tcp_csum);
> + payload = IOV_TAIL(iov, iov_cnt, VNET_HLEN);
> + tcp_fill_headers(c, conn, !v6, &payload, *check, conn->seq_to_tap,
> + no_tcp_csum);
> if (ip4h)
> *check = ip4h->check;
> }
> --
> 2.53.0
>
--
David Gibson (he or they) | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you, not the other way
| around.
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
next prev parent reply other threads:[~2026-03-24 5:24 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-23 16:52 [PATCH 0/7] vhost-user,tcp: Handle multiple iovec entries per virtqueue element Laurent Vivier
2026-03-23 16:52 ` [PATCH 1/7] tcp: pass ipv4h checksum, not a pointer to the checksum Laurent Vivier
2026-03-24 3:53 ` David Gibson
2026-03-24 7:56 ` Laurent Vivier
2026-03-24 23:49 ` David Gibson
2026-03-23 16:52 ` [PATCH 2/7] tcp: use iov_tail to access headers in tcp_fill_headers() Laurent Vivier
2026-03-24 3:58 ` David Gibson [this message]
2026-03-23 16:52 ` [PATCH 3/7] tcp_vu: Use iov_tail helpers to build headers in tcp_vu_prepare() Laurent Vivier
2026-03-25 4:46 ` David Gibson
2026-03-23 16:52 ` [PATCH 4/7] tcp_vu: Support multibuffer frames in tcp_vu_sock_recv() Laurent Vivier
2026-03-25 5:06 ` David Gibson
2026-03-23 16:52 ` [PATCH 5/7] tcp: Use iov_tail to access headers in tcp_prepare_flags() Laurent Vivier
2026-03-23 16:52 ` [PATCH 6/7] iov: introduce iov_memcopy() Laurent Vivier
2026-03-23 16:52 ` [PATCH 7/7] tcp_vu: Use iov_tail helpers to build headers in tcp_vu_send_flag() Laurent Vivier
2026-03-25 5:07 ` [PATCH 0/7] vhost-user,tcp: Handle multiple iovec entries per virtqueue element David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=acIL78P9hW1ne0iR@zatzit \
--to=david@gibson.dropbear.id.au \
--cc=lvivier@redhat.com \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).