public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: Stefano Brivio <sbrivio@redhat.com>
To: Laurent Vivier <lvivier@redhat.com>
Cc: passt-dev@passt.top
Subject: Re: [PATCH 14/24] udp: move udpX_l2_buf_t and udpX_l2_mh_sock out of udp_update_hdrX()
Date: Mon, 12 Feb 2024 00:16:06 +0100	[thread overview]
Message-ID: <20240212001606.0ddf5a36@elisabeth> (raw)
In-Reply-To: <20240202141151.3762941-15-lvivier@redhat.com>

On Fri,  2 Feb 2024 15:11:41 +0100
Laurent Vivier <lvivier@redhat.com> wrote:

> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> ---
>  udp.c | 126 ++++++++++++++++++++++++++++++++++------------------------
>  1 file changed, 73 insertions(+), 53 deletions(-)
> 
> diff --git a/udp.c b/udp.c
> index db635742319b..77168fb0a2af 100644
> --- a/udp.c
> +++ b/udp.c
> @@ -562,47 +562,48 @@ static void udp_splice_sendfrom(const struct ctx *c, unsigned start, unsigned n,
>   *
>   * Return: size of tap frame with headers
>   */
> -static size_t udp_update_hdr4(const struct ctx *c, int n, in_port_t dstport,
> -			      const struct timespec *now)
> +static size_t udp_update_hdr4(const struct ctx *c, struct iphdr *iph,
> +			      size_t data_len, struct sockaddr_in *s_in,
> +			      in_port_t dstport, const struct timespec *now)

Function comment should be updated to reflect the new set of parameters.

>  {
> -	struct udp4_l2_buf_t *b = &udp4_l2_buf[n];
> +	struct udphdr *uh = (struct udphdr *)(iph + 1);
>  	in_port_t src_port;
>  	size_t ip_len;
>  
> -	ip_len = udp4_l2_mh_sock[n].msg_len + sizeof(b->iph) + sizeof(b->uh);
> +	ip_len = data_len + sizeof(struct iphdr) + sizeof(struct udphdr);

ip_len takes into account the size of iph and uh, both local, so for
consistency with the rest of the codebase, + sizeof(*iph) + sizeof(*uh).

>  
> -	b->iph.tot_len = htons(ip_len);
> -	b->iph.daddr = c->ip4.addr_seen.s_addr;
> +	iph->tot_len = htons(ip_len);
> +	iph->daddr = c->ip4.addr_seen.s_addr;
>  
> -	src_port = ntohs(b->s_in.sin_port);
> +	src_port = ntohs(s_in->sin_port);
>  
>  	if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.dns_match) &&
> -	    IN4_ARE_ADDR_EQUAL(&b->s_in.sin_addr, &c->ip4.dns_host) &&
> +	    IN4_ARE_ADDR_EQUAL(&s_in->sin_addr, &c->ip4.dns_host) &&
>  	    src_port == 53) {
> -		b->iph.saddr = c->ip4.dns_match.s_addr;
> -	} else if (IN4_IS_ADDR_LOOPBACK(&b->s_in.sin_addr) ||
> -		   IN4_IS_ADDR_UNSPECIFIED(&b->s_in.sin_addr)||
> -		   IN4_ARE_ADDR_EQUAL(&b->s_in.sin_addr, &c->ip4.addr_seen)) {
> -		b->iph.saddr = c->ip4.gw.s_addr;
> +		iph->saddr = c->ip4.dns_match.s_addr;
> +	} else if (IN4_IS_ADDR_LOOPBACK(&s_in->sin_addr) ||
> +		   IN4_IS_ADDR_UNSPECIFIED(&s_in->sin_addr)||
> +		   IN4_ARE_ADDR_EQUAL(&s_in->sin_addr, &c->ip4.addr_seen)) {
> +		iph->saddr = c->ip4.gw.s_addr;
>  		udp_tap_map[V4][src_port].ts = now->tv_sec;
>  		udp_tap_map[V4][src_port].flags |= PORT_LOCAL;
>  
> -		if (IN4_ARE_ADDR_EQUAL(&b->s_in.sin_addr.s_addr, &c->ip4.addr_seen))
> +		if (IN4_ARE_ADDR_EQUAL(&s_in->sin_addr.s_addr, &c->ip4.addr_seen))
>  			udp_tap_map[V4][src_port].flags &= ~PORT_LOOPBACK;
>  		else
>  			udp_tap_map[V4][src_port].flags |= PORT_LOOPBACK;
>  
>  		bitmap_set(udp_act[V4][UDP_ACT_TAP], src_port);
>  	} else {
> -		b->iph.saddr = b->s_in.sin_addr.s_addr;
> +		iph->saddr = s_in->sin_addr.s_addr;
>  	}
>  
> -	b->iph.check = ipv4_hdr_checksum(&b->iph, IPPROTO_UDP);
> -	b->uh.source = b->s_in.sin_port;
> -	b->uh.dest = htons(dstport);
> -	b->uh.len = htons(udp4_l2_mh_sock[n].msg_len + sizeof(b->uh));
> +	iph->check = ipv4_hdr_checksum(iph, IPPROTO_UDP);
> +	uh->source = s_in->sin_port;
> +	uh->dest = htons(dstport);
> +	uh->len= htons(data_len + sizeof(struct udphdr));

Missing whitespace.

>  
> -	return tap_iov_len(c, &b->taph, ip_len);
> +	return ip_len;
>  }
>  
>  /**
> @@ -614,38 +615,39 @@ static size_t udp_update_hdr4(const struct ctx *c, int n, in_port_t dstport,
>   *
>   * Return: size of tap frame with headers
>   */
> -static size_t udp_update_hdr6(const struct ctx *c, int n, in_port_t dstport,
> -			      const struct timespec *now)
> +static size_t udp_update_hdr6(const struct ctx *c, struct ipv6hdr *ip6h,
> +			      size_t data_len, struct sockaddr_in6 *s_in6,
> +			      in_port_t dstport, const struct timespec *now)

Same here, function comment should be updated.

>  {
> -	struct udp6_l2_buf_t *b = &udp6_l2_buf[n];
> +	struct udphdr *uh = (struct udphdr *)(ip6h + 1);
>  	struct in6_addr *src;
>  	in_port_t src_port;
>  	size_t ip_len;
>  
> -	src = &b->s_in6.sin6_addr;
> -	src_port = ntohs(b->s_in6.sin6_port);
> +	src = &s_in6->sin6_addr;
> +	src_port = ntohs(s_in6->sin6_port);
>  
> -	ip_len = udp6_l2_mh_sock[n].msg_len + sizeof(b->ip6h) + sizeof(b->uh);
> +	ip_len = data_len + sizeof(struct ipv6hdr) + sizeof(struct udphdr);
>  
> -	b->ip6h.payload_len = htons(udp6_l2_mh_sock[n].msg_len + sizeof(b->uh));
> +	ip6h->payload_len = htons(data_len + sizeof(struct udphdr));
>  
>  	if (IN6_IS_ADDR_LINKLOCAL(src)) {
> -		b->ip6h.daddr = c->ip6.addr_ll_seen;
> -		b->ip6h.saddr = b->s_in6.sin6_addr;
> +		ip6h->daddr = c->ip6.addr_ll_seen;
> +		ip6h->saddr = s_in6->sin6_addr;
>  	} else if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.dns_match) &&
>  		   IN6_ARE_ADDR_EQUAL(src, &c->ip6.dns_host) &&
>  		   src_port == 53) {
> -		b->ip6h.daddr = c->ip6.addr_seen;
> -		b->ip6h.saddr = c->ip6.dns_match;
> +		ip6h->daddr = c->ip6.addr_seen;
> +		ip6h->saddr = c->ip6.dns_match;
>  	} else if (IN6_IS_ADDR_LOOPBACK(src)			||
>  		   IN6_ARE_ADDR_EQUAL(src, &c->ip6.addr_seen)	||
>  		   IN6_ARE_ADDR_EQUAL(src, &c->ip6.addr)) {
> -		b->ip6h.daddr = c->ip6.addr_ll_seen;
> +		ip6h->daddr = c->ip6.addr_ll_seen;
>  
>  		if (IN6_IS_ADDR_LINKLOCAL(&c->ip6.gw))
> -			b->ip6h.saddr = c->ip6.gw;
> +			ip6h->saddr = c->ip6.gw;
>  		else
> -			b->ip6h.saddr = c->ip6.addr_ll;
> +			ip6h->saddr = c->ip6.addr_ll;
>  
>  		udp_tap_map[V6][src_port].ts = now->tv_sec;
>  		udp_tap_map[V6][src_port].flags |= PORT_LOCAL;
> @@ -662,20 +664,20 @@ static size_t udp_update_hdr6(const struct ctx *c, int n, in_port_t dstport,
>  
>  		bitmap_set(udp_act[V6][UDP_ACT_TAP], src_port);
>  	} else {
> -		b->ip6h.daddr = c->ip6.addr_seen;
> -		b->ip6h.saddr = b->s_in6.sin6_addr;
> +		ip6h->daddr = c->ip6.addr_seen;
> +		ip6h->saddr = s_in6->sin6_addr;
>  	}
>  
> -	b->uh.source = b->s_in6.sin6_port;
> -	b->uh.dest = htons(dstport);
> -	b->uh.len = b->ip6h.payload_len;
> -	b->uh.check = csum(&b->uh, ntohs(b->ip6h.payload_len),
> -			 proto_ipv6_header_checksum(&b->ip6h, IPPROTO_UDP));
> -	b->ip6h.version = 6;
> -	b->ip6h.nexthdr = IPPROTO_UDP;
> -	b->ip6h.hop_limit = 255;
> +	uh->source = s_in6->sin6_port;
> +	uh->dest = htons(dstport);
> +	uh->len = ip6h->payload_len;
> +	uh->check = csum(uh, ntohs(ip6h->payload_len),
> +			 proto_ipv6_header_checksum(ip6h, IPPROTO_UDP));
> +	ip6h->version = 6;
> +	ip6h->nexthdr = IPPROTO_UDP;
> +	ip6h->hop_limit = 255;
>  
> -	return tap_iov_len(c, &b->taph, ip_len);
> +	return ip_len;
>  }
>  
>  /**
> @@ -689,6 +691,11 @@ static size_t udp_update_hdr6(const struct ctx *c, int n, in_port_t dstport,
>   *
>   * Return: size of tap frame with headers
>   */
> +#pragma GCC diagnostic push
> +/* ignore unaligned pointer value warning for &udp6_l2_buf[i].ip6h and 
> + * &udp4_l2_buf[i].iph

...this is the reason why I originally wrote these functions this way:
for AVX2 builds, we align ip6h because we need to calculate the
checksum (the UDP checksum for IPv6 needs a version of the IPv6 header
as pseudo-header).

But for non-AVX2 builds, and for IPv4, we don't need any alignment
other than 4-bytes alignment of the start (s_in / s_in6). If you pass
iph or ip6h as arguments and dereference them, then they need to be
aligned (4-bytes) to be safe on all the architectures we might
reasonably be running on.

Passing &udp6_l2_buf[n] and dereferencing only that should work (gcc
checks are rather reliable in my experience, you don't have to go and
test this on all the possible architectures). Would that work for you?

Otherwise, we can pad as we do for AVX2 builds, but we'll waste bytes.
In this sense, the existing version is not ideal either:

$ CFLAGS="-g" make
$ pahole passt | less

[...]

struct udp4_l2_buf_t {
        struct sockaddr_in         s_in;                 /*     0    16 */
        struct tap_hdr             taph;                 /*    16    18 */
        struct iphdr               iph;                  /*    34    20 */
        struct udphdr              uh;                   /*    54     8 */
        uint8_t                    data[65507];          /*    62 65507 */

        /* size: 65572, cachelines: 1025, members: 5 */
        /* padding: 3 */
        /* last cacheline: 36 bytes */
} __attribute__((__aligned__(4)));

while in general we try to keep those structures below or at exactly
64 KiB. If we make 'data' smaller, though, we might truncate messages.

But at the same time, we need to keep struct sockaddr_in (or bigger) and
struct tap_hdr in here, plus padding for AVX2 builds.

Given that ~64 KiB messages are not common in practice, I wonder if we
could keep parallel arrays with sets of those few excess bytes we need,
using them as second items of iovec arrays, which will almost never be
used.

Anyway, this is beyond the scope of this patch. About this change
itself, I guess passing (and dereferencing) the head of the buffer, or
aligning iph / ipv6h should be enough.

> + */
> +#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
>  static void udp_tap_send(const struct ctx *c,
>  			 unsigned int start, unsigned int n,
>  			 in_port_t dstport, bool v6, const struct timespec *now)
> @@ -702,18 +709,31 @@ static void udp_tap_send(const struct ctx *c,
>  		tap_iov = udp4_l2_iov_tap;
>  
>  	for (i = start; i < start + n; i++) {
> -		size_t buf_len;
> -
> -		if (v6)
> -			buf_len = udp_update_hdr6(c, i, dstport, now);
> -		else
> -			buf_len = udp_update_hdr4(c, i, dstport, now);
> -
> -		tap_iov[i].iov_len = buf_len;
> +		size_t ip_len;
> +
> +		if (v6) {
> +			ip_len = udp_update_hdr6(c, &udp6_l2_buf[i].ip6h,
> +						 udp6_l2_mh_sock[i].msg_len,
> +						 &udp6_l2_buf[i].s_in6, dstport,
> +						 now);
> +			tap_iov[i].iov_len = tap_iov_len(c,
> +							 &udp6_l2_buf[i].taph,
> +							 ip_len);
> +		} else {
> +			ip_len = udp_update_hdr4(c, &udp4_l2_buf[i].iph,
> +						 udp4_l2_mh_sock[i].msg_len,
> +						 &udp4_l2_buf[i].s_in,
> +						 dstport, now);
> +
> +			tap_iov[i].iov_len = tap_iov_len(c,
> +							 &udp4_l2_buf[i].taph,
> +							 ip_len);
> +		}
>  	}
>  
>  	tap_send_frames(c, tap_iov + start, n);
>  }
> +#pragma GCC diagnostic pop
>  
>  /**
>   * udp_sock_handler() - Handle new data from socket

-- 
Stefano


  parent reply	other threads:[~2024-02-11 23:16 UTC|newest]

Thread overview: 83+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-02 14:11 [PATCH 00/24] Add vhost-user support to passt Laurent Vivier
2024-02-02 14:11 ` [PATCH 01/24] iov: add some functions to manage iovec Laurent Vivier
2024-02-05  5:57   ` David Gibson
2024-02-06 14:28     ` Laurent Vivier
2024-02-07  1:01       ` David Gibson
2024-02-07 10:00         ` Laurent Vivier
2024-02-06 16:10   ` Stefano Brivio
2024-02-07 14:02     ` Laurent Vivier
2024-02-07 14:57       ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 02/24] pcap: add pcap_iov() Laurent Vivier
2024-02-05  6:25   ` David Gibson
2024-02-06 16:10   ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 03/24] checksum: align buffers Laurent Vivier
2024-02-05  6:02   ` David Gibson
2024-02-07  9:01     ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 04/24] checksum: add csum_iov() Laurent Vivier
2024-02-05  6:07   ` David Gibson
2024-02-07  9:02   ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 05/24] util: move IP stuff from util.[ch] to ip.[ch] Laurent Vivier
2024-02-05  6:13   ` David Gibson
2024-02-07  9:03     ` Stefano Brivio
2024-02-08  0:04       ` David Gibson
2024-02-02 14:11 ` [PATCH 06/24] ip: move duplicate IPv4 checksum function to ip.h Laurent Vivier
2024-02-05  6:16   ` David Gibson
2024-02-07 10:40   ` Stefano Brivio
2024-02-07 23:43     ` David Gibson
2024-02-02 14:11 ` [PATCH 07/24] ip: introduce functions to compute the header part checksum for TCP/UDP Laurent Vivier
2024-02-05  6:20   ` David Gibson
2024-02-07 10:41   ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 08/24] tcp: extract buffer management from tcp_send_flag() Laurent Vivier
2024-02-06  0:24   ` David Gibson
2024-02-08 16:57   ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 09/24] tcp: extract buffer management from tcp_conn_tap_mss() Laurent Vivier
2024-02-06  0:47   ` David Gibson
2024-02-08 16:59   ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 10/24] tcp: rename functions that manage buffers Laurent Vivier
2024-02-06  1:48   ` David Gibson
2024-02-08 17:10     ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 11/24] tcp: move buffers management functions to their own file Laurent Vivier
2024-02-02 14:11 ` [PATCH 12/24] tap: make tap_update_mac() generic Laurent Vivier
2024-02-06  1:49   ` David Gibson
2024-02-08 17:10     ` Stefano Brivio
2024-02-09  5:02       ` David Gibson
2024-02-02 14:11 ` [PATCH 13/24] tap: export pool_flush()/tapX_handler()/packet_add() Laurent Vivier
2024-02-02 14:29   ` Laurent Vivier
2024-02-06  1:52   ` David Gibson
2024-02-11 23:15   ` Stefano Brivio
2024-02-12  2:22     ` David Gibson
2024-02-02 14:11 ` [PATCH 14/24] udp: move udpX_l2_buf_t and udpX_l2_mh_sock out of udp_update_hdrX() Laurent Vivier
2024-02-06  1:59   ` David Gibson
2024-02-11 23:16   ` Stefano Brivio [this message]
2024-02-02 14:11 ` [PATCH 15/24] udp: rename udp_sock_handler() to udp_buf_sock_handler() Laurent Vivier
2024-02-06  2:14   ` David Gibson
2024-02-11 23:17     ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 16/24] packet: replace struct desc by struct iovec Laurent Vivier
2024-02-06  2:25   ` David Gibson
2024-02-11 23:18     ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 17/24] vhost-user: compare mode MODE_PASTA and not MODE_PASST Laurent Vivier
2024-02-06  2:29   ` David Gibson
2024-02-02 14:11 ` [PATCH 18/24] vhost-user: introduce virtio API Laurent Vivier
2024-02-06  3:51   ` David Gibson
2024-02-11 23:18     ` Stefano Brivio
2024-02-12  2:26       ` David Gibson
2024-02-02 14:11 ` [PATCH 19/24] vhost-user: introduce vhost-user API Laurent Vivier
2024-02-07  2:13   ` David Gibson
2024-02-02 14:11 ` [PATCH 20/24] vhost-user: add vhost-user Laurent Vivier
2024-02-07  2:40   ` David Gibson
2024-02-11 23:19     ` Stefano Brivio
2024-02-12  2:47       ` David Gibson
2024-02-13 15:22         ` Stefano Brivio
2024-02-14  2:05           ` David Gibson
2024-02-11 23:19   ` Stefano Brivio
2024-02-12  2:49     ` David Gibson
2024-02-12 10:02       ` Laurent Vivier
2024-02-12 16:56         ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 21/24] vhost-user: use guest buffer directly in vu_handle_tx() Laurent Vivier
2024-02-09  4:26   ` David Gibson
2024-02-02 14:11 ` [PATCH 22/24] tcp: vhost-user RX nocopy Laurent Vivier
2024-02-09  4:57   ` David Gibson
2024-02-02 14:11 ` [PATCH 23/24] udp: " Laurent Vivier
2024-02-09  5:00   ` David Gibson
2024-02-02 14:11 ` [PATCH 24/24] vhost-user: remove tap_send_frames_vu() Laurent Vivier
2024-02-09  5:01   ` David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240212001606.0ddf5a36@elisabeth \
    --to=sbrivio@redhat.com \
    --cc=lvivier@redhat.com \
    --cc=passt-dev@passt.top \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).