public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: Laurent Vivier <lvivier@redhat.com>
Cc: passt-dev@passt.top
Subject: Re: [PATCH 2/7] tcp: use iov_tail to access headers in tcp_fill_headers()
Date: Tue, 24 Mar 2026 14:58:39 +1100	[thread overview]
Message-ID: <acIL78P9hW1ne0iR@zatzit> (raw)
In-Reply-To: <20260323165259.1253482-3-lvivier@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 9697 bytes --]

On Mon, Mar 23, 2026 at 05:52:54PM +0100, Laurent Vivier wrote:
> Instead of receiving individual pointers to each protocol header (eh,
> ip4h, ip6h, th), have tcp_fill_headers() take an iov_tail starting at
> the Ethernet header and walk through it using with_header() and
> IOV_DROP_HEADER() to access each header in turn.
> 
> Replace the ip4h/ip6h NULL-pointer convention with a bool ipv4
> parameter, and move Ethernet header filling (MAC address and ethertype)
> into tcp_fill_headers() as well, since the function now owns the full
> header chain.
> 
> This simplifies callers, which no longer need to extract and pass
> individual header pointers.
> 
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> ---
>  tcp.c          | 106 +++++++++++++++++++++++++++----------------------
>  tcp_buf.c      |  16 ++------
>  tcp_internal.h |   4 +-
>  tcp_vu.c       |  12 +++---
>  4 files changed, 70 insertions(+), 68 deletions(-)
> 
> diff --git a/tcp.c b/tcp.c
> index 158a5be0327e..058792d5b184 100644
> --- a/tcp.c
> +++ b/tcp.c
> @@ -938,11 +938,8 @@ static void tcp_fill_header(struct tcphdr *th,
>   * tcp_fill_headers() - Fill 802.3, IP, TCP headers
>   * @c:			Execution context
>   * @conn:		Connection pointer
> - * @eh:		Pointer to Ethernet header
> - * @ip4h:		Pointer to IPv4 header, or NULL
> - * @ip6h:		Pointer to IPv6 header, or NULL
> - * @th:			Pointer to TCP header
> - * @payload:		TCP payload
> + * @ipv4:		True for IPv4, false for IPv6
> + * @payload:		IOV tail starting at the Ethernet header

As UDP I'm not sure I like removing the separater parameters.  A
little more so, since this doesn't (any more) have the ugly
udp_payload_t equivalent.  At minimum @payload is no longer a good
name.

>   * @ip4_check:		IPv4 checksum, if already known
>   * @seq:		Sequence number for this segment
>   * @no_tcp_csum:	Do not set TCP checksum
> @@ -950,74 +947,89 @@ static void tcp_fill_header(struct tcphdr *th,
>   * Return: frame length (including L2 headers)
>   */
>  size_t tcp_fill_headers(const struct ctx *c, struct tcp_tap_conn *conn,
> -			struct ethhdr *eh,
> -			struct iphdr *ip4h, struct ipv6hdr *ip6h,
> -			struct tcphdr *th, struct iov_tail *payload,
> +			bool ipv4, const struct iov_tail *payload,
>  			int ip4_check, uint32_t seq, bool no_tcp_csum)
>  {
>  	const struct flowside *tapside = TAPFLOW(conn);
> -	size_t l4len = iov_tail_size(payload) + sizeof(*th);
> +	struct iov_tail current = *payload;
>  	uint8_t *omac = conn->f.tap_omac;
> -	size_t l3len = l4len;
> +	size_t l3len, l4len;
>  	uint32_t psum = 0;
>  
> -	if (ip4h) {
> +	with_header(struct ethhdr, eh, &current) {
> +		if (ipv4)
> +			eh->h_proto = htons_constant(ETH_P_IP);
> +		else
> +			eh->h_proto = htons_constant(ETH_P_IPV6);
> +
> +		/* Find if neighbour table has a recorded MAC address */
> +		if (MAC_IS_UNDEF(omac))
> +			fwd_neigh_mac_get(c, &tapside->oaddr, omac);
> +		eth_update_mac(eh, NULL, omac);
> +	}
> +	IOV_DROP_HEADER(&current, struct ethhdr);
> +
> +	l3len = iov_tail_size(&current);
> +
> +	if (ipv4) {
>  		const struct in_addr *src4 = inany_v4(&tapside->oaddr);
>  		const struct in_addr *dst4 = inany_v4(&tapside->eaddr);
>  
>  		assert(src4 && dst4);
>  
> -		l3len += + sizeof(*ip4h);
> +		l4len = l3len - sizeof(struct iphdr);
>  
> -		ip4h->tot_len = htons(l3len);
> -		ip4h->saddr = src4->s_addr;
> -		ip4h->daddr = dst4->s_addr;
> +		with_header(struct iphdr, ip4h, &current) {
> +			ip4h->tot_len = htons(l3len);
> +			ip4h->saddr = src4->s_addr;
> +			ip4h->daddr = dst4->s_addr;
>  
> -		if (ip4_check != -1)
> -			ip4h->check = ip4_check;
> -		else
> -			ip4h->check = csum_ip4_header(l3len, IPPROTO_TCP,
> -						      *src4, *dst4);
> +			if (ip4_check != -1)
> +				ip4h->check = ip4_check;
> +			else
> +				ip4h->check = csum_ip4_header(l3len,
> +							      IPPROTO_TCP,
> +							      *src4, *dst4);
> +		}
> +		IOV_DROP_HEADER(&current, struct iphdr);
>  
>  		if (!no_tcp_csum) {
>  			psum = proto_ipv4_header_psum(l4len, IPPROTO_TCP,
>  						      *src4, *dst4);
>  		}
> -		eh->h_proto = htons_constant(ETH_P_IP);
> -	}
> -
> -	if (ip6h) {
> -		l3len += sizeof(*ip6h);
> +	} else {
> +		l4len = l3len - sizeof(struct ipv6hdr);
>  
> -		ip6h->payload_len = htons(l4len);
> -		ip6h->saddr = tapside->oaddr.a6;
> -		ip6h->daddr = tapside->eaddr.a6;
> +		with_header(struct ipv6hdr, ip6h, &current) {
> +			ip6h->payload_len = htons(l4len);
> +			ip6h->saddr = tapside->oaddr.a6;
> +			ip6h->daddr = tapside->eaddr.a6;
>  
> -		ip6h->hop_limit = 255;
> -		ip6h->version = 6;
> -		ip6h->nexthdr = IPPROTO_TCP;
> +			ip6h->hop_limit = 255;
> +			ip6h->version = 6;
> +			ip6h->nexthdr = IPPROTO_TCP;
>  
> -		ip6_set_flow_lbl(ip6h, conn->sock);
> +			ip6_set_flow_lbl(ip6h, conn->sock);
>  
> -		if (!no_tcp_csum) {
> -			psum = proto_ipv6_header_psum(l4len, IPPROTO_TCP,
> -						      &ip6h->saddr,
> -						      &ip6h->daddr);
> +			if (!no_tcp_csum) {
> +				psum = proto_ipv6_header_psum(l4len,
> +							      IPPROTO_TCP,
> +							      &ip6h->saddr,
> +							      &ip6h->daddr);
> +			}
>  		}
> -		eh->h_proto = htons_constant(ETH_P_IPV6);
> +		IOV_DROP_HEADER(&current, struct ipv6hdr);
>  	}
>  
> -	/* Find if neighbour table has a recorded MAC address */
> -	if (MAC_IS_UNDEF(omac))
> -		fwd_neigh_mac_get(c, &tapside->oaddr, omac);
> -	eth_update_mac(eh, NULL, omac);
> -
> -	tcp_fill_header(th, conn, seq);
> -
> -	if (no_tcp_csum)
> +	with_header(struct tcphdr, th, &current) {
> +		tcp_fill_header(th, conn, seq);
>  		th->check = 0;
> -	else
> -		tcp_update_csum(psum, th, payload);
> +	}
> +
> +	if (!no_tcp_csum) {
> +		with_header(struct tcphdr, th, &current)
> +			th->check = csum_iov_tail(&current, psum);
> +	}
>  
>  	return MAX(l3len + sizeof(struct ethhdr), ETH_ZLEN);
>  }
> diff --git a/tcp_buf.c b/tcp_buf.c
> index bc0f58dd7a5e..891043c96dcb 100644
> --- a/tcp_buf.c
> +++ b/tcp_buf.c
> @@ -175,23 +175,15 @@ static void tcp_l2_buf_fill_headers(const struct ctx *c,
>  				    struct iovec *iov, int check,
>  				    uint32_t seq, bool no_tcp_csum)
>  {
> -	struct iov_tail tail = IOV_TAIL(&iov[TCP_IOV_PAYLOAD], 1, 0);
> -	struct tcphdr th_storage, *th = IOV_REMOVE_HEADER(&tail, th_storage);
> +	struct iov_tail tail = IOV_TAIL(&iov[TCP_IOV_ETH],
> +					TCP_IOV_PAYLOAD + 1 - TCP_IOV_ETH, 0);
>  	struct tap_hdr *taph = iov[TCP_IOV_TAP].iov_base;
>  	const struct flowside *tapside = TAPFLOW(conn);
> -	const struct in_addr *a4 = inany_v4(&tapside->oaddr);
> -	struct ethhdr *eh = iov[TCP_IOV_ETH].iov_base;
> -	struct ipv6hdr *ip6h = NULL;
> -	struct iphdr *ip4h = NULL;
> +	bool ipv4 = inany_v4(&tapside->oaddr) != NULL;
>  	size_t l2len;
>  
> -	if (a4)
> -		ip4h = iov[TCP_IOV_IP].iov_base;
> -	else
> -		ip6h = iov[TCP_IOV_IP].iov_base;
> +	l2len = tcp_fill_headers(c, conn, ipv4, &tail, check, seq, no_tcp_csum);
>  
> -	l2len = tcp_fill_headers(c, conn, eh, ip4h, ip6h, th, &tail, check, seq,
> -				 no_tcp_csum);
>  	tap_hdr_update(taph, l2len);
>  }
>  
> diff --git a/tcp_internal.h b/tcp_internal.h
> index bb7a6629839c..136e947f6e70 100644
> --- a/tcp_internal.h
> +++ b/tcp_internal.h
> @@ -184,9 +184,7 @@ void tcp_rst_do(const struct ctx *c, struct tcp_tap_conn *conn);
>  struct tcp_info_linux;
>  
>  size_t tcp_fill_headers(const struct ctx *c, struct tcp_tap_conn *conn,
> -			struct ethhdr *eh,
> -			struct iphdr *ip4h, struct ipv6hdr *ip6h,
> -			struct tcphdr *th, struct iov_tail *payload,
> +			bool ipv4, const struct iov_tail *payload,
>  			int ip4_check, uint32_t seq, bool no_tcp_csum);
>  
>  int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
> diff --git a/tcp_vu.c b/tcp_vu.c
> index a21ee3499aed..c6206b7a689c 100644
> --- a/tcp_vu.c
> +++ b/tcp_vu.c
> @@ -132,13 +132,12 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
>  	}
>  
>  	vu_pad(&flags_iov[0], 1, 0, hdrlen + optlen);
> -	payload = IOV_TAIL(flags_elem[0].in_sg, 1, hdrlen);
>  
>  	if (flags & KEEPALIVE)
>  		seq--;
>  
> -	tcp_fill_headers(c, conn, eh, ip4h, ip6h, th, &payload,
> -			 -1, seq, !*c->pcap);
> +	payload = IOV_TAIL(flags_elem[0].in_sg, 1, VNET_HLEN);
> +	tcp_fill_headers(c, conn, CONN_V4(conn), &payload, -1, seq, !*c->pcap);
>  
>  	if (*c->pcap)
>  		pcap_iov(&flags_elem[0].in_sg[0], 1, VNET_HLEN);
> @@ -288,10 +287,10 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
>  	const struct flowside *toside = TAPFLOW(conn);
>  	bool v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
>  	size_t hdrlen = tcp_vu_hdrlen(v6);
> -	struct iov_tail payload = IOV_TAIL(iov, iov_cnt, hdrlen);
>  	char *base = iov[0].iov_base;
>  	struct ipv6hdr *ip6h = NULL;
>  	struct iphdr *ip4h = NULL;
> +	struct iov_tail payload;
>  	struct tcphdr *th;
>  	struct ethhdr *eh;
>  
> @@ -326,8 +325,9 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
>  	th->ack = 1;
>  	th->psh = push;
>  
> -	tcp_fill_headers(c, conn, eh, ip4h, ip6h, th, &payload,
> -			 *check, conn->seq_to_tap, no_tcp_csum);
> +	payload = IOV_TAIL(iov, iov_cnt, VNET_HLEN);
> +	tcp_fill_headers(c, conn, !v6, &payload, *check, conn->seq_to_tap,
> +			 no_tcp_csum);
>  	if (ip4h)
>  		*check = ip4h->check;
>  }
> -- 
> 2.53.0
> 

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

  reply	other threads:[~2026-03-24  5:24 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-23 16:52 [PATCH 0/7] vhost-user,tcp: Handle multiple iovec entries per virtqueue element Laurent Vivier
2026-03-23 16:52 ` [PATCH 1/7] tcp: pass ipv4h checksum, not a pointer to the checksum Laurent Vivier
2026-03-24  3:53   ` David Gibson
2026-03-24  7:56     ` Laurent Vivier
2026-03-24 23:49       ` David Gibson
2026-03-23 16:52 ` [PATCH 2/7] tcp: use iov_tail to access headers in tcp_fill_headers() Laurent Vivier
2026-03-24  3:58   ` David Gibson [this message]
2026-03-23 16:52 ` [PATCH 3/7] tcp_vu: Use iov_tail helpers to build headers in tcp_vu_prepare() Laurent Vivier
2026-03-25  4:46   ` David Gibson
2026-03-23 16:52 ` [PATCH 4/7] tcp_vu: Support multibuffer frames in tcp_vu_sock_recv() Laurent Vivier
2026-03-25  5:06   ` David Gibson
2026-03-23 16:52 ` [PATCH 5/7] tcp: Use iov_tail to access headers in tcp_prepare_flags() Laurent Vivier
2026-03-23 16:52 ` [PATCH 6/7] iov: introduce iov_memcopy() Laurent Vivier
2026-03-23 16:52 ` [PATCH 7/7] tcp_vu: Use iov_tail helpers to build headers in tcp_vu_send_flag() Laurent Vivier
2026-03-25  5:07 ` [PATCH 0/7] vhost-user,tcp: Handle multiple iovec entries per virtqueue element David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=acIL78P9hW1ne0iR@zatzit \
    --to=david@gibson.dropbear.id.au \
    --cc=lvivier@redhat.com \
    --cc=passt-dev@passt.top \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).