From: Stefano Brivio <sbrivio@redhat.com>
To: Laurent Vivier <lvivier@redhat.com>
Cc: passt-dev@passt.top
Subject: Re: [PATCH 08/24] tcp: extract buffer management from tcp_send_flag()
Date: Thu, 8 Feb 2024 17:57:27 +0100 [thread overview]
Message-ID: <20240208175727.52d68b84@elisabeth> (raw)
In-Reply-To: <20240202141151.3762941-9-lvivier@redhat.com>
On Fri, 2 Feb 2024 15:11:35 +0100
Laurent Vivier <lvivier@redhat.com> wrote:
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> ---
> tcp.c | 224 +++++++++++++++++++++++++++++++++-------------------------
> 1 file changed, 129 insertions(+), 95 deletions(-)
>
> diff --git a/tcp.c b/tcp.c
> index 2fd6bc2eda53..20ad8a4e5271 100644
> --- a/tcp.c
> +++ b/tcp.c
> @@ -1320,87 +1320,98 @@ void tcp_defer_handler(struct ctx *c)
> tcp_l2_data_buf_flush(c);
> }
>
> +static void tcp_set_tcp_header(struct tcphdr *th,
> + const struct tcp_tap_conn *conn, uint32_t seq)
Ah, nice, thanks, it adds a few lines but it's much better than that
macro soup.
I think the names of this function and the following ones, though, are
now a bit less consistent: filling and setting are pretty much the same
thing, and ipv4_fill_headers() only works for TCP packets. What about
tcp_fill_header(), tcp_fill_ipv4_header(), and tcp_fill_ipv6_header()?
Or _set_ for everything.
> +{
> + th->source = htons(conn->fport);
> + th->dest = htons(conn->eport);
> + th->seq = htonl(seq);
> + th->ack_seq = htonl(conn->seq_ack_to_tap);
> + if (conn->events & ESTABLISHED) {
> + th->window = htons(conn->wnd_to_tap);
> + } else {
> + unsigned wnd = conn->wnd_to_tap << conn->ws_to_tap;
> +
> + th->window = htons(MIN(wnd, USHRT_MAX));
> + }
> +}
> +
> /**
> - * tcp_l2_buf_fill_headers() - Fill 802.3, IP, TCP headers in pre-cooked buffers
> + * ipv4_fill_headers() - Fill 802.3, IPv4, TCP headers in pre-cooked buffers
> * @c: Execution context
> * @conn: Connection pointer
> - * @p: Pointer to any type of TCP pre-cooked buffer
> + * @iph: Pointer to IPv4 header, immediately followed by a TCP header
> * @plen: Payload length (including TCP header options)
> * @check: Checksum, if already known
> * @seq: Sequence number for this segment
> *
> - * Return: frame length including L2 headers, host order
> + * Return: IP frame length including L2 headers, host order
> */
> -static size_t tcp_l2_buf_fill_headers(const struct ctx *c,
> - const struct tcp_tap_conn *conn,
> - void *p, size_t plen,
> - const uint16_t *check, uint32_t seq)
> +
> +static size_t ipv4_fill_headers(const struct ctx *c,
> + const struct tcp_tap_conn *conn,
> + struct iphdr *iph, size_t plen,
> + const uint16_t *check, uint32_t seq)
> {
> + struct tcphdr *th = (void *)(iph + 1);
We should check this against gcc 11.2, because I had a warning with the
previous attempt at this, below:
> - /* gcc 11.2 would complain on data = (char *)(th + 1); */
Besides, void * will be promoted to struct tcphdr *, but can't we just
use the right cast right away? That is,
struct tcphdr *th = (struct tcphdr *)(iph + 1);
Either way, this should go after the next line (declarations from
longest to shortest).
> const struct in_addr *a4 = inany_v4(&conn->faddr);
> - size_t ip_len, tlen;
> -
> -#define SET_TCP_HEADER_COMMON_V4_V6(b, conn, seq) \
> -do { \
> - b->th.source = htons(conn->fport); \
> - b->th.dest = htons(conn->eport); \
> - b->th.seq = htonl(seq); \
> - b->th.ack_seq = htonl(conn->seq_ack_to_tap); \
> - if (conn->events & ESTABLISHED) { \
> - b->th.window = htons(conn->wnd_to_tap); \
> - } else { \
> - unsigned wnd = conn->wnd_to_tap << conn->ws_to_tap; \
> - \
> - b->th.window = htons(MIN(wnd, USHRT_MAX)); \
> - } \
> -} while (0)
> -
> - if (a4) {
> - struct tcp4_l2_buf_t *b = (struct tcp4_l2_buf_t *)p;
> -
> - ip_len = plen + sizeof(struct iphdr) + sizeof(struct tcphdr);
> - b->iph.tot_len = htons(ip_len);
> - b->iph.saddr = a4->s_addr;
> - b->iph.daddr = c->ip4.addr_seen.s_addr;
> -
> - b->iph.check = check ? *check :
> - ipv4_hdr_checksum(&b->iph, IPPROTO_TCP);
> -
> - SET_TCP_HEADER_COMMON_V4_V6(b, conn, seq);
> -
> - b->th.check = tcp_update_check_tcp4(&b->iph);
> -
> - tlen = tap_iov_len(c, &b->taph, ip_len);
> - } else {
> - struct tcp6_l2_buf_t *b = (struct tcp6_l2_buf_t *)p;
> + size_t ip_len = plen + sizeof(struct iphdr) + sizeof(struct tcphdr);
...and this should be the first one.
>
> - ip_len = plen + sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
> + iph->tot_len = htons(ip_len);
> + iph->saddr = a4->s_addr;
> + iph->daddr = c->ip4.addr_seen.s_addr;
>
> - b->ip6h.payload_len = htons(plen + sizeof(struct tcphdr));
> - b->ip6h.saddr = conn->faddr.a6;
> - if (IN6_IS_ADDR_LINKLOCAL(&b->ip6h.saddr))
> - b->ip6h.daddr = c->ip6.addr_ll_seen;
> - else
> - b->ip6h.daddr = c->ip6.addr_seen;
> + iph->check = check ? *check : ipv4_hdr_checksum(iph, IPPROTO_TCP);
> +
> + tcp_set_tcp_header(th, conn, seq);
> +
> + th->check = tcp_update_check_tcp4(iph);
> +
> + return ip_len;
> +}
> +
> +/**
> + * ipv6_fill_headers() - Fill 802.3, IPv6, TCP headers in pre-cooked buffers
> + * @c: Execution context
> + * @conn: Connection pointer
> + * @ip6h: Pointer to IPv6 header, immediately followed by a TCP header
> + * @plen: Payload length (including TCP header options)
> + * @check: Checksum, if already known
> + * @seq: Sequence number for this segment
> + *
> + * Return: IP frame length including L2 headers, host order
> + */
> +
> +static size_t ipv6_fill_headers(const struct ctx *c,
> + const struct tcp_tap_conn *conn,
> + struct ipv6hdr *ip6h, size_t plen,
> + uint32_t seq)
> +{
> + struct tcphdr *th = (void *)(ip6h + 1);
> + size_t ip_len = plen + sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
>
> - memset(b->ip6h.flow_lbl, 0, 3);
> + ip6h->payload_len = htons(plen + sizeof(struct tcphdr));
> + ip6h->saddr = conn->faddr.a6;
> + if (IN6_IS_ADDR_LINKLOCAL(&ip6h->saddr))
> + ip6h->daddr = c->ip6.addr_ll_seen;
> + else
> + ip6h->daddr = c->ip6.addr_seen;
>
> - SET_TCP_HEADER_COMMON_V4_V6(b, conn, seq);
> + memset(ip6h->flow_lbl, 0, 3);
>
> - b->th.check = tcp_update_check_tcp6(&b->ip6h);
> + tcp_set_tcp_header(th, conn, seq);
>
> - b->ip6h.hop_limit = 255;
> - b->ip6h.version = 6;
> - b->ip6h.nexthdr = IPPROTO_TCP;
> + th->check = tcp_update_check_tcp6(ip6h);
>
> - b->ip6h.flow_lbl[0] = (conn->sock >> 16) & 0xf;
> - b->ip6h.flow_lbl[1] = (conn->sock >> 8) & 0xff;
> - b->ip6h.flow_lbl[2] = (conn->sock >> 0) & 0xff;
> + ip6h->hop_limit = 255;
> + ip6h->version = 6;
> + ip6h->nexthdr = IPPROTO_TCP;
>
> - tlen = tap_iov_len(c, &b->taph, ip_len);
> - }
> -#undef SET_TCP_HEADER_COMMON_V4_V6
> + ip6h->flow_lbl[0] = (conn->sock >> 16) & 0xf;
> + ip6h->flow_lbl[1] = (conn->sock >> 8) & 0xff;
> + ip6h->flow_lbl[2] = (conn->sock >> 0) & 0xff;
>
> - return tlen;
> + return ip_len;
> }
>
> /**
> @@ -1520,27 +1531,21 @@ static void tcp_update_seqack_from_tap(const struct ctx *c,
> }
>
> /**
> - * tcp_send_flag() - Send segment with flags to tap (no payload)
> + * do_tcp_send_flag() - Send segment with flags to tap (no payload)
> * @c: Execution context
> * @conn: Connection pointer
> * @flags: TCP flags: if not set, send segment only if ACK is due
> *
> * Return: negative error code on connection reset, 0 otherwise
This should be adjusted -- it took me a while to realise what you mean
by 0 and 1 now.
> */
> -static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
> +
> +static int do_tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags, struct tcphdr *th, char *data, size_t optlen)
Maybe tcp_fill_flag_header() - Prepare header for flags-only segment
(no payload)?
> {
> uint32_t prev_ack_to_tap = conn->seq_ack_to_tap;
> uint32_t prev_wnd_to_tap = conn->wnd_to_tap;
> - struct tcp4_l2_flags_buf_t *b4 = NULL;
> - struct tcp6_l2_flags_buf_t *b6 = NULL;
> struct tcp_info tinfo = { 0 };
> socklen_t sl = sizeof(tinfo);
> int s = conn->sock;
> - size_t optlen = 0;
> - struct iovec *iov;
> - struct tcphdr *th;
> - char *data;
> - void *p;
>
> if (SEQ_GE(conn->seq_ack_to_tap, conn->seq_from_tap) &&
> !flags && conn->wnd_to_tap)
> @@ -1562,26 +1567,9 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
> if (!tcp_update_seqack_wnd(c, conn, flags, &tinfo) && !flags)
> return 0;
>
> - if (CONN_V4(conn)) {
> - iov = tcp4_l2_flags_iov + tcp4_l2_flags_buf_used;
> - p = b4 = tcp4_l2_flags_buf + tcp4_l2_flags_buf_used++;
> - th = &b4->th;
> -
> - /* gcc 11.2 would complain on data = (char *)(th + 1); */
> - data = b4->opts;
> - } else {
> - iov = tcp6_l2_flags_iov + tcp6_l2_flags_buf_used;
> - p = b6 = tcp6_l2_flags_buf + tcp6_l2_flags_buf_used++;
> - th = &b6->th;
> - data = b6->opts;
> - }
> -
> if (flags & SYN) {
> int mss;
>
> - /* Options: MSS, NOP and window scale (8 bytes) */
> - optlen = OPT_MSS_LEN + 1 + OPT_WS_LEN;
> -
This is very header-specific, whereas tcp_send_flag() isn't anymore.
Perhaps the new function could take a pointer to optlen and change it
instead?
> *data++ = OPT_MSS;
> *data++ = OPT_MSS_LEN;
>
> @@ -1624,9 +1612,6 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
> th->syn = !!(flags & SYN);
> th->fin = !!(flags & FIN);
>
> - iov->iov_len = tcp_l2_buf_fill_headers(c, conn, p, optlen,
> - NULL, conn->seq_to_tap);
> -
> if (th->ack) {
> if (SEQ_GE(conn->seq_ack_to_tap, conn->seq_from_tap))
> conn_flag(c, conn, ~ACK_TO_TAP_DUE);
> @@ -1641,8 +1626,38 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
> if (th->fin || th->syn)
> conn->seq_to_tap++;
>
> + return 1;
> +}
> +
> +static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
> +{
> + size_t optlen = 0;
> + struct iovec *iov;
> + size_t ip_len;
> + int ret;
> +
> + /* Options: MSS, NOP and window scale (8 bytes) */
> + if (flags & SYN)
> + optlen = OPT_MSS_LEN + 1 + OPT_WS_LEN;
> +
> if (CONN_V4(conn)) {
> + struct tcp4_l2_flags_buf_t *b4;
> +
> + iov = tcp4_l2_flags_iov + tcp4_l2_flags_buf_used;
> + b4 = tcp4_l2_flags_buf + tcp4_l2_flags_buf_used++;
> +
> + ret = do_tcp_send_flag(c, conn, flags, &b4->th, b4->opts,
> + optlen);
> + if (ret <= 0)
> + return ret;
> +
> + ip_len = ipv4_fill_headers(c, conn, &b4->iph, optlen,
> + NULL, conn->seq_to_tap);
> +
> + iov->iov_len = tap_iov_len(c, &b4->taph, ip_len);
> +
> if (flags & DUP_ACK) {
> +
> memcpy(b4 + 1, b4, sizeof(*b4));
> (iov + 1)->iov_len = iov->iov_len;
> tcp4_l2_flags_buf_used++;
> @@ -1651,6 +1666,21 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
> if (tcp4_l2_flags_buf_used > ARRAY_SIZE(tcp4_l2_flags_buf) - 2)
> tcp_l2_flags_buf_flush(c);
> } else {
> + struct tcp6_l2_flags_buf_t *b6;
> +
> + iov = tcp6_l2_flags_iov + tcp6_l2_flags_buf_used;
> + b6 = tcp6_l2_flags_buf + tcp6_l2_flags_buf_used++;
> +
> + ret = do_tcp_send_flag(c, conn, flags, &b6->th, b6->opts,
> + optlen);
> + if (ret <= 0)
> + return ret;
> +
> + ip_len = ipv6_fill_headers(c, conn, &b6->ip6h, optlen,
> + conn->seq_to_tap);
> +
> + iov->iov_len = tap_iov_len(c, &b6->taph, ip_len);
> +
> if (flags & DUP_ACK) {
> memcpy(b6 + 1, b6, sizeof(*b6));
> (iov + 1)->iov_len = iov->iov_len;
> @@ -2050,6 +2080,7 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
> {
> uint32_t *seq_update = &conn->seq_to_tap;
> struct iovec *iov;
> + size_t ip_len;
>
> if (CONN_V4(conn)) {
> struct tcp4_l2_buf_t *b = &tcp4_l2_buf[tcp4_l2_buf_used];
> @@ -2058,9 +2089,11 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
> tcp4_l2_buf_seq_update[tcp4_l2_buf_used].seq = seq_update;
> tcp4_l2_buf_seq_update[tcp4_l2_buf_used].len = plen;
>
> + ip_len = ipv4_fill_headers(c, conn, &b->iph, plen,
> + check, seq);
> +
> iov = tcp4_l2_iov + tcp4_l2_buf_used++;
> - iov->iov_len = tcp_l2_buf_fill_headers(c, conn, b, plen,
> - check, seq);
> + iov->iov_len = tap_iov_len(c, &b->taph, ip_len);
> if (tcp4_l2_buf_used > ARRAY_SIZE(tcp4_l2_buf) - 1)
> tcp_l2_data_buf_flush(c);
> } else if (CONN_V6(conn)) {
> @@ -2069,9 +2102,10 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
> tcp6_l2_buf_seq_update[tcp6_l2_buf_used].seq = seq_update;
> tcp6_l2_buf_seq_update[tcp6_l2_buf_used].len = plen;
>
> + ip_len = ipv6_fill_headers(c, conn, &b->ip6h, plen, seq);
> +
> iov = tcp6_l2_iov + tcp6_l2_buf_used++;
> - iov->iov_len = tcp_l2_buf_fill_headers(c, conn, b, plen,
> - NULL, seq);
> + iov->iov_len = tap_iov_len(c, &b->taph, ip_len);
> if (tcp6_l2_buf_used > ARRAY_SIZE(tcp6_l2_buf) - 1)
> tcp_l2_data_buf_flush(c);
> }
--
Stefano
next prev parent reply other threads:[~2024-02-08 16:59 UTC|newest]
Thread overview: 83+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-02-02 14:11 [PATCH 00/24] Add vhost-user support to passt Laurent Vivier
2024-02-02 14:11 ` [PATCH 01/24] iov: add some functions to manage iovec Laurent Vivier
2024-02-05 5:57 ` David Gibson
2024-02-06 14:28 ` Laurent Vivier
2024-02-07 1:01 ` David Gibson
2024-02-07 10:00 ` Laurent Vivier
2024-02-06 16:10 ` Stefano Brivio
2024-02-07 14:02 ` Laurent Vivier
2024-02-07 14:57 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 02/24] pcap: add pcap_iov() Laurent Vivier
2024-02-05 6:25 ` David Gibson
2024-02-06 16:10 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 03/24] checksum: align buffers Laurent Vivier
2024-02-05 6:02 ` David Gibson
2024-02-07 9:01 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 04/24] checksum: add csum_iov() Laurent Vivier
2024-02-05 6:07 ` David Gibson
2024-02-07 9:02 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 05/24] util: move IP stuff from util.[ch] to ip.[ch] Laurent Vivier
2024-02-05 6:13 ` David Gibson
2024-02-07 9:03 ` Stefano Brivio
2024-02-08 0:04 ` David Gibson
2024-02-02 14:11 ` [PATCH 06/24] ip: move duplicate IPv4 checksum function to ip.h Laurent Vivier
2024-02-05 6:16 ` David Gibson
2024-02-07 10:40 ` Stefano Brivio
2024-02-07 23:43 ` David Gibson
2024-02-02 14:11 ` [PATCH 07/24] ip: introduce functions to compute the header part checksum for TCP/UDP Laurent Vivier
2024-02-05 6:20 ` David Gibson
2024-02-07 10:41 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 08/24] tcp: extract buffer management from tcp_send_flag() Laurent Vivier
2024-02-06 0:24 ` David Gibson
2024-02-08 16:57 ` Stefano Brivio [this message]
2024-02-02 14:11 ` [PATCH 09/24] tcp: extract buffer management from tcp_conn_tap_mss() Laurent Vivier
2024-02-06 0:47 ` David Gibson
2024-02-08 16:59 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 10/24] tcp: rename functions that manage buffers Laurent Vivier
2024-02-06 1:48 ` David Gibson
2024-02-08 17:10 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 11/24] tcp: move buffers management functions to their own file Laurent Vivier
2024-02-02 14:11 ` [PATCH 12/24] tap: make tap_update_mac() generic Laurent Vivier
2024-02-06 1:49 ` David Gibson
2024-02-08 17:10 ` Stefano Brivio
2024-02-09 5:02 ` David Gibson
2024-02-02 14:11 ` [PATCH 13/24] tap: export pool_flush()/tapX_handler()/packet_add() Laurent Vivier
2024-02-02 14:29 ` Laurent Vivier
2024-02-06 1:52 ` David Gibson
2024-02-11 23:15 ` Stefano Brivio
2024-02-12 2:22 ` David Gibson
2024-02-02 14:11 ` [PATCH 14/24] udp: move udpX_l2_buf_t and udpX_l2_mh_sock out of udp_update_hdrX() Laurent Vivier
2024-02-06 1:59 ` David Gibson
2024-02-11 23:16 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 15/24] udp: rename udp_sock_handler() to udp_buf_sock_handler() Laurent Vivier
2024-02-06 2:14 ` David Gibson
2024-02-11 23:17 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 16/24] packet: replace struct desc by struct iovec Laurent Vivier
2024-02-06 2:25 ` David Gibson
2024-02-11 23:18 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 17/24] vhost-user: compare mode MODE_PASTA and not MODE_PASST Laurent Vivier
2024-02-06 2:29 ` David Gibson
2024-02-02 14:11 ` [PATCH 18/24] vhost-user: introduce virtio API Laurent Vivier
2024-02-06 3:51 ` David Gibson
2024-02-11 23:18 ` Stefano Brivio
2024-02-12 2:26 ` David Gibson
2024-02-02 14:11 ` [PATCH 19/24] vhost-user: introduce vhost-user API Laurent Vivier
2024-02-07 2:13 ` David Gibson
2024-02-02 14:11 ` [PATCH 20/24] vhost-user: add vhost-user Laurent Vivier
2024-02-07 2:40 ` David Gibson
2024-02-11 23:19 ` Stefano Brivio
2024-02-12 2:47 ` David Gibson
2024-02-13 15:22 ` Stefano Brivio
2024-02-14 2:05 ` David Gibson
2024-02-11 23:19 ` Stefano Brivio
2024-02-12 2:49 ` David Gibson
2024-02-12 10:02 ` Laurent Vivier
2024-02-12 16:56 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 21/24] vhost-user: use guest buffer directly in vu_handle_tx() Laurent Vivier
2024-02-09 4:26 ` David Gibson
2024-02-02 14:11 ` [PATCH 22/24] tcp: vhost-user RX nocopy Laurent Vivier
2024-02-09 4:57 ` David Gibson
2024-02-02 14:11 ` [PATCH 23/24] udp: " Laurent Vivier
2024-02-09 5:00 ` David Gibson
2024-02-02 14:11 ` [PATCH 24/24] vhost-user: remove tap_send_frames_vu() Laurent Vivier
2024-02-09 5:01 ` David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240208175727.52d68b84@elisabeth \
--to=sbrivio@redhat.com \
--cc=lvivier@redhat.com \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).