From: David Gibson <david@gibson.dropbear.id.au>
To: Laurent Vivier <lvivier@redhat.com>
Cc: passt-dev@passt.top
Subject: Re: [PATCH 08/24] tcp: extract buffer management from tcp_send_flag()
Date: Tue, 6 Feb 2024 11:24:15 +1100 [thread overview]
Message-ID: <ZcF8Lxy7a5q8JqEM@zatzit> (raw)
In-Reply-To: <20240202141151.3762941-9-lvivier@redhat.com>
[-- Attachment #1: Type: text/plain, Size: 13163 bytes --]
On Fri, Feb 02, 2024 at 03:11:35PM +0100, Laurent Vivier wrote:
This definitely needs a commit message to explain what you're trying
to achieve here. Without further context "buffer management" suggests
to me allocation / freeing / placement of buffers, whereas what's
actually being moved here is code related to the construction of
headers within the buffers.
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> ---
> tcp.c | 224 +++++++++++++++++++++++++++++++++-------------------------
> 1 file changed, 129 insertions(+), 95 deletions(-)
>
> diff --git a/tcp.c b/tcp.c
> index 2fd6bc2eda53..20ad8a4e5271 100644
> --- a/tcp.c
> +++ b/tcp.c
> @@ -1320,87 +1320,98 @@ void tcp_defer_handler(struct ctx *c)
> tcp_l2_data_buf_flush(c);
> }
>
Function comment.
> +static void tcp_set_tcp_header(struct tcphdr *th,
> + const struct tcp_tap_conn *conn, uint32_t seq)
> +{
> + th->source = htons(conn->fport);
> + th->dest = htons(conn->eport);
> + th->seq = htonl(seq);
> + th->ack_seq = htonl(conn->seq_ack_to_tap);
> + if (conn->events & ESTABLISHED) {
> + th->window = htons(conn->wnd_to_tap);
> + } else {
> + unsigned wnd = conn->wnd_to_tap << conn->ws_to_tap;
> +
> + th->window = htons(MIN(wnd, USHRT_MAX));
> + }
> +}
> +
> /**
> - * tcp_l2_buf_fill_headers() - Fill 802.3, IP, TCP headers in pre-cooked buffers
> + * ipv4_fill_headers() - Fill 802.3, IPv4, TCP headers in pre-cooked buffers
> * @c: Execution context
> * @conn: Connection pointer
> - * @p: Pointer to any type of TCP pre-cooked buffer
> + * @iph: Pointer to IPv4 header, immediately followed by a TCP header
> * @plen: Payload length (including TCP header options)
> * @check: Checksum, if already known
> * @seq: Sequence number for this segment
> *
> - * Return: frame length including L2 headers, host order
> + * Return: IP frame length including L2 headers, host order
This is an odd case where adding an extra descriptor is making it less
clear to me. Without context, "frame" suggests to me an entire L2
frame containing whatever inner protocol. But I'm not immediately
sure what "IP frame" means: is it an L2 frame which happens to have IP
inside, or does it mean just the IP packet without the L2 headers.
The rest of the sentence clarifies it's the first, but it still throws
me for a moment.
> */
> -static size_t tcp_l2_buf_fill_headers(const struct ctx *c,
> - const struct tcp_tap_conn *conn,
> - void *p, size_t plen,
> - const uint16_t *check, uint32_t seq)
> +
> +static size_t ipv4_fill_headers(const struct ctx *c,
> + const struct tcp_tap_conn *conn,
> + struct iphdr *iph, size_t plen,
> + const uint16_t *check, uint32_t seq)
I really like this re-organization of the header filling code. I
wonder if separating it into a separate patch might make the remainder
patch easier to follow.
> {
> + struct tcphdr *th = (void *)(iph + 1);
> const struct in_addr *a4 = inany_v4(&conn->faddr);
> - size_t ip_len, tlen;
> -
> -#define SET_TCP_HEADER_COMMON_V4_V6(b, conn, seq) \
> -do { \
> - b->th.source = htons(conn->fport); \
> - b->th.dest = htons(conn->eport); \
> - b->th.seq = htonl(seq); \
> - b->th.ack_seq = htonl(conn->seq_ack_to_tap); \
> - if (conn->events & ESTABLISHED) { \
> - b->th.window = htons(conn->wnd_to_tap); \
> - } else { \
> - unsigned wnd = conn->wnd_to_tap << conn->ws_to_tap; \
> - \
> - b->th.window = htons(MIN(wnd, USHRT_MAX)); \
> - } \
> -} while (0)
> -
> - if (a4) {
> - struct tcp4_l2_buf_t *b = (struct tcp4_l2_buf_t *)p;
> -
> - ip_len = plen + sizeof(struct iphdr) + sizeof(struct tcphdr);
> - b->iph.tot_len = htons(ip_len);
> - b->iph.saddr = a4->s_addr;
> - b->iph.daddr = c->ip4.addr_seen.s_addr;
> -
> - b->iph.check = check ? *check :
> - ipv4_hdr_checksum(&b->iph, IPPROTO_TCP);
> -
> - SET_TCP_HEADER_COMMON_V4_V6(b, conn, seq);
> -
> - b->th.check = tcp_update_check_tcp4(&b->iph);
> -
> - tlen = tap_iov_len(c, &b->taph, ip_len);
> - } else {
> - struct tcp6_l2_buf_t *b = (struct tcp6_l2_buf_t *)p;
> + size_t ip_len = plen + sizeof(struct iphdr) + sizeof(struct tcphdr);
>
> - ip_len = plen + sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
> + iph->tot_len = htons(ip_len);
> + iph->saddr = a4->s_addr;
> + iph->daddr = c->ip4.addr_seen.s_addr;
>
> - b->ip6h.payload_len = htons(plen + sizeof(struct tcphdr));
> - b->ip6h.saddr = conn->faddr.a6;
> - if (IN6_IS_ADDR_LINKLOCAL(&b->ip6h.saddr))
> - b->ip6h.daddr = c->ip6.addr_ll_seen;
> - else
> - b->ip6h.daddr = c->ip6.addr_seen;
> + iph->check = check ? *check : ipv4_hdr_checksum(iph, IPPROTO_TCP);
> +
> + tcp_set_tcp_header(th, conn, seq);
> +
> + th->check = tcp_update_check_tcp4(iph);
> +
> + return ip_len;
> +}
> +
> +/**
> + * ipv6_fill_headers() - Fill 802.3, IPv6, TCP headers in pre-cooked buffers
> + * @c: Execution context
> + * @conn: Connection pointer
> + * @ip6h: Pointer to IPv6 header, immediately followed by a TCP header
> + * @plen: Payload length (including TCP header options)
> + * @check: Checksum, if already known
> + * @seq: Sequence number for this segment
> + *
> + * Return: IP frame length including L2 headers, host order
> + */
> +
> +static size_t ipv6_fill_headers(const struct ctx *c,
> + const struct tcp_tap_conn *conn,
> + struct ipv6hdr *ip6h, size_t plen,
> + uint32_t seq)
> +{
> + struct tcphdr *th = (void *)(ip6h + 1);
> + size_t ip_len = plen + sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
>
> - memset(b->ip6h.flow_lbl, 0, 3);
> + ip6h->payload_len = htons(plen + sizeof(struct tcphdr));
> + ip6h->saddr = conn->faddr.a6;
> + if (IN6_IS_ADDR_LINKLOCAL(&ip6h->saddr))
> + ip6h->daddr = c->ip6.addr_ll_seen;
> + else
> + ip6h->daddr = c->ip6.addr_seen;
>
> - SET_TCP_HEADER_COMMON_V4_V6(b, conn, seq);
> + memset(ip6h->flow_lbl, 0, 3);
>
> - b->th.check = tcp_update_check_tcp6(&b->ip6h);
> + tcp_set_tcp_header(th, conn, seq);
>
> - b->ip6h.hop_limit = 255;
> - b->ip6h.version = 6;
> - b->ip6h.nexthdr = IPPROTO_TCP;
> + th->check = tcp_update_check_tcp6(ip6h);
>
> - b->ip6h.flow_lbl[0] = (conn->sock >> 16) & 0xf;
> - b->ip6h.flow_lbl[1] = (conn->sock >> 8) & 0xff;
> - b->ip6h.flow_lbl[2] = (conn->sock >> 0) & 0xff;
> + ip6h->hop_limit = 255;
> + ip6h->version = 6;
> + ip6h->nexthdr = IPPROTO_TCP;
>
> - tlen = tap_iov_len(c, &b->taph, ip_len);
> - }
> -#undef SET_TCP_HEADER_COMMON_V4_V6
> + ip6h->flow_lbl[0] = (conn->sock >> 16) & 0xf;
> + ip6h->flow_lbl[1] = (conn->sock >> 8) & 0xff;
> + ip6h->flow_lbl[2] = (conn->sock >> 0) & 0xff;
>
> - return tlen;
> + return ip_len;
> }
>
> /**
> @@ -1520,27 +1531,21 @@ static void tcp_update_seqack_from_tap(const struct ctx *c,
> }
>
> /**
> - * tcp_send_flag() - Send segment with flags to tap (no payload)
> + * do_tcp_send_flag() - Send segment with flags to tap (no payload)
As a rule, I don't love "do_X" as a function name. I particularly
dislike it here because AFAICT this function isn't actually the one
that "does" the sending of the flag - that happens with the
tcp_l2_flags_buf_flush() in the caller.
> * @c: Execution context
> * @conn: Connection pointer
> * @flags: TCP flags: if not set, send segment only if ACK is due
> *
> * Return: negative error code on connection reset, 0 otherwise
> */
> -static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
> +
> +static int do_tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags, struct tcphdr *th, char *data, size_t optlen)
> {
> uint32_t prev_ack_to_tap = conn->seq_ack_to_tap;
> uint32_t prev_wnd_to_tap = conn->wnd_to_tap;
> - struct tcp4_l2_flags_buf_t *b4 = NULL;
> - struct tcp6_l2_flags_buf_t *b6 = NULL;
> struct tcp_info tinfo = { 0 };
> socklen_t sl = sizeof(tinfo);
> int s = conn->sock;
> - size_t optlen = 0;
> - struct iovec *iov;
> - struct tcphdr *th;
> - char *data;
> - void *p;
>
> if (SEQ_GE(conn->seq_ack_to_tap, conn->seq_from_tap) &&
> !flags && conn->wnd_to_tap)
> @@ -1562,26 +1567,9 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
> if (!tcp_update_seqack_wnd(c, conn, flags, &tinfo) && !flags)
> return 0;
>
> - if (CONN_V4(conn)) {
> - iov = tcp4_l2_flags_iov + tcp4_l2_flags_buf_used;
> - p = b4 = tcp4_l2_flags_buf + tcp4_l2_flags_buf_used++;
> - th = &b4->th;
> -
> - /* gcc 11.2 would complain on data = (char *)(th + 1); */
> - data = b4->opts;
> - } else {
> - iov = tcp6_l2_flags_iov + tcp6_l2_flags_buf_used;
> - p = b6 = tcp6_l2_flags_buf + tcp6_l2_flags_buf_used++;
> - th = &b6->th;
> - data = b6->opts;
> - }
> -
> if (flags & SYN) {
> int mss;
>
> - /* Options: MSS, NOP and window scale (8 bytes) */
> - optlen = OPT_MSS_LEN + 1 + OPT_WS_LEN;
> -
> *data++ = OPT_MSS;
> *data++ = OPT_MSS_LEN;
>
> @@ -1624,9 +1612,6 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
> th->syn = !!(flags & SYN);
> th->fin = !!(flags & FIN);
>
> - iov->iov_len = tcp_l2_buf_fill_headers(c, conn, p, optlen,
> - NULL, conn->seq_to_tap);
> -
> if (th->ack) {
> if (SEQ_GE(conn->seq_ack_to_tap, conn->seq_from_tap))
> conn_flag(c, conn, ~ACK_TO_TAP_DUE);
> @@ -1641,8 +1626,38 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
> if (th->fin || th->syn)
> conn->seq_to_tap++;
>
> + return 1;
> +}
> +
> +static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
> +{
> + size_t optlen = 0;
> + struct iovec *iov;
> + size_t ip_len;
> + int ret;
> +
> + /* Options: MSS, NOP and window scale (8 bytes) */
> + if (flags & SYN)
> + optlen = OPT_MSS_LEN + 1 + OPT_WS_LEN;
> +
> if (CONN_V4(conn)) {
> + struct tcp4_l2_flags_buf_t *b4;
> +
> + iov = tcp4_l2_flags_iov + tcp4_l2_flags_buf_used;
> + b4 = tcp4_l2_flags_buf + tcp4_l2_flags_buf_used++;
> +
> + ret = do_tcp_send_flag(c, conn, flags, &b4->th, b4->opts,
> + optlen);
> + if (ret <= 0)
> + return ret;
> +
> + ip_len = ipv4_fill_headers(c, conn, &b4->iph, optlen,
> + NULL, conn->seq_to_tap);
> +
> + iov->iov_len = tap_iov_len(c, &b4->taph, ip_len);
> +
> if (flags & DUP_ACK) {
> +
> memcpy(b4 + 1, b4, sizeof(*b4));
> (iov + 1)->iov_len = iov->iov_len;
> tcp4_l2_flags_buf_used++;
> @@ -1651,6 +1666,21 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
> if (tcp4_l2_flags_buf_used > ARRAY_SIZE(tcp4_l2_flags_buf) - 2)
> tcp_l2_flags_buf_flush(c);
> } else {
> + struct tcp6_l2_flags_buf_t *b6;
> +
> + iov = tcp6_l2_flags_iov + tcp6_l2_flags_buf_used;
> + b6 = tcp6_l2_flags_buf + tcp6_l2_flags_buf_used++;
> +
> + ret = do_tcp_send_flag(c, conn, flags, &b6->th, b6->opts,
> + optlen);
> + if (ret <= 0)
> + return ret;
> +
> + ip_len = ipv6_fill_headers(c, conn, &b6->ip6h, optlen,
> + conn->seq_to_tap);
> +
> + iov->iov_len = tap_iov_len(c, &b6->taph, ip_len);
> +
> if (flags & DUP_ACK) {
> memcpy(b6 + 1, b6, sizeof(*b6));
> (iov + 1)->iov_len = iov->iov_len;
> @@ -2050,6 +2080,7 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
> {
> uint32_t *seq_update = &conn->seq_to_tap;
> struct iovec *iov;
> + size_t ip_len;
>
> if (CONN_V4(conn)) {
> struct tcp4_l2_buf_t *b = &tcp4_l2_buf[tcp4_l2_buf_used];
> @@ -2058,9 +2089,11 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
> tcp4_l2_buf_seq_update[tcp4_l2_buf_used].seq = seq_update;
> tcp4_l2_buf_seq_update[tcp4_l2_buf_used].len = plen;
>
> + ip_len = ipv4_fill_headers(c, conn, &b->iph, plen,
> + check, seq);
> +
> iov = tcp4_l2_iov + tcp4_l2_buf_used++;
> - iov->iov_len = tcp_l2_buf_fill_headers(c, conn, b, plen,
> - check, seq);
> + iov->iov_len = tap_iov_len(c, &b->taph, ip_len);
> if (tcp4_l2_buf_used > ARRAY_SIZE(tcp4_l2_buf) - 1)
> tcp_l2_data_buf_flush(c);
> } else if (CONN_V6(conn)) {
> @@ -2069,9 +2102,10 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
> tcp6_l2_buf_seq_update[tcp6_l2_buf_used].seq = seq_update;
> tcp6_l2_buf_seq_update[tcp6_l2_buf_used].len = plen;
>
> + ip_len = ipv6_fill_headers(c, conn, &b->ip6h, plen, seq);
> +
> iov = tcp6_l2_iov + tcp6_l2_buf_used++;
> - iov->iov_len = tcp_l2_buf_fill_headers(c, conn, b, plen,
> - NULL, seq);
> + iov->iov_len = tap_iov_len(c, &b->taph, ip_len);
> if (tcp6_l2_buf_used > ARRAY_SIZE(tcp6_l2_buf) - 1)
> tcp_l2_data_buf_flush(c);
> }
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
next prev parent reply other threads:[~2024-02-06 0:33 UTC|newest]
Thread overview: 83+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-02-02 14:11 [PATCH 00/24] Add vhost-user support to passt Laurent Vivier
2024-02-02 14:11 ` [PATCH 01/24] iov: add some functions to manage iovec Laurent Vivier
2024-02-05 5:57 ` David Gibson
2024-02-06 14:28 ` Laurent Vivier
2024-02-07 1:01 ` David Gibson
2024-02-07 10:00 ` Laurent Vivier
2024-02-06 16:10 ` Stefano Brivio
2024-02-07 14:02 ` Laurent Vivier
2024-02-07 14:57 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 02/24] pcap: add pcap_iov() Laurent Vivier
2024-02-05 6:25 ` David Gibson
2024-02-06 16:10 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 03/24] checksum: align buffers Laurent Vivier
2024-02-05 6:02 ` David Gibson
2024-02-07 9:01 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 04/24] checksum: add csum_iov() Laurent Vivier
2024-02-05 6:07 ` David Gibson
2024-02-07 9:02 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 05/24] util: move IP stuff from util.[ch] to ip.[ch] Laurent Vivier
2024-02-05 6:13 ` David Gibson
2024-02-07 9:03 ` Stefano Brivio
2024-02-08 0:04 ` David Gibson
2024-02-02 14:11 ` [PATCH 06/24] ip: move duplicate IPv4 checksum function to ip.h Laurent Vivier
2024-02-05 6:16 ` David Gibson
2024-02-07 10:40 ` Stefano Brivio
2024-02-07 23:43 ` David Gibson
2024-02-02 14:11 ` [PATCH 07/24] ip: introduce functions to compute the header part checksum for TCP/UDP Laurent Vivier
2024-02-05 6:20 ` David Gibson
2024-02-07 10:41 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 08/24] tcp: extract buffer management from tcp_send_flag() Laurent Vivier
2024-02-06 0:24 ` David Gibson [this message]
2024-02-08 16:57 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 09/24] tcp: extract buffer management from tcp_conn_tap_mss() Laurent Vivier
2024-02-06 0:47 ` David Gibson
2024-02-08 16:59 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 10/24] tcp: rename functions that manage buffers Laurent Vivier
2024-02-06 1:48 ` David Gibson
2024-02-08 17:10 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 11/24] tcp: move buffers management functions to their own file Laurent Vivier
2024-02-02 14:11 ` [PATCH 12/24] tap: make tap_update_mac() generic Laurent Vivier
2024-02-06 1:49 ` David Gibson
2024-02-08 17:10 ` Stefano Brivio
2024-02-09 5:02 ` David Gibson
2024-02-02 14:11 ` [PATCH 13/24] tap: export pool_flush()/tapX_handler()/packet_add() Laurent Vivier
2024-02-02 14:29 ` Laurent Vivier
2024-02-06 1:52 ` David Gibson
2024-02-11 23:15 ` Stefano Brivio
2024-02-12 2:22 ` David Gibson
2024-02-02 14:11 ` [PATCH 14/24] udp: move udpX_l2_buf_t and udpX_l2_mh_sock out of udp_update_hdrX() Laurent Vivier
2024-02-06 1:59 ` David Gibson
2024-02-11 23:16 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 15/24] udp: rename udp_sock_handler() to udp_buf_sock_handler() Laurent Vivier
2024-02-06 2:14 ` David Gibson
2024-02-11 23:17 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 16/24] packet: replace struct desc by struct iovec Laurent Vivier
2024-02-06 2:25 ` David Gibson
2024-02-11 23:18 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 17/24] vhost-user: compare mode MODE_PASTA and not MODE_PASST Laurent Vivier
2024-02-06 2:29 ` David Gibson
2024-02-02 14:11 ` [PATCH 18/24] vhost-user: introduce virtio API Laurent Vivier
2024-02-06 3:51 ` David Gibson
2024-02-11 23:18 ` Stefano Brivio
2024-02-12 2:26 ` David Gibson
2024-02-02 14:11 ` [PATCH 19/24] vhost-user: introduce vhost-user API Laurent Vivier
2024-02-07 2:13 ` David Gibson
2024-02-02 14:11 ` [PATCH 20/24] vhost-user: add vhost-user Laurent Vivier
2024-02-07 2:40 ` David Gibson
2024-02-11 23:19 ` Stefano Brivio
2024-02-12 2:47 ` David Gibson
2024-02-13 15:22 ` Stefano Brivio
2024-02-14 2:05 ` David Gibson
2024-02-11 23:19 ` Stefano Brivio
2024-02-12 2:49 ` David Gibson
2024-02-12 10:02 ` Laurent Vivier
2024-02-12 16:56 ` Stefano Brivio
2024-02-02 14:11 ` [PATCH 21/24] vhost-user: use guest buffer directly in vu_handle_tx() Laurent Vivier
2024-02-09 4:26 ` David Gibson
2024-02-02 14:11 ` [PATCH 22/24] tcp: vhost-user RX nocopy Laurent Vivier
2024-02-09 4:57 ` David Gibson
2024-02-02 14:11 ` [PATCH 23/24] udp: " Laurent Vivier
2024-02-09 5:00 ` David Gibson
2024-02-02 14:11 ` [PATCH 24/24] vhost-user: remove tap_send_frames_vu() Laurent Vivier
2024-02-09 5:01 ` David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ZcF8Lxy7a5q8JqEM@zatzit \
--to=david@gibson.dropbear.id.au \
--cc=lvivier@redhat.com \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).