From: Stefano Brivio <sbrivio@redhat.com>
To: Jon Maloy <jmaloy@redhat.com>
Cc: passt-dev@passt.top, lvivier@redhat.com, dgibson@redhat.com
Subject: Re: [PATCH] tcp: unify payload and flags l2 frames array
Date: Sun, 22 Sep 2024 08:45:08 +0200 [thread overview]
Message-ID: <20240922084508.04fe7206@elisabeth> (raw)
In-Reply-To: <20240921194333.3938826-1-jmaloy@redhat.com>
On Sat, 21 Sep 2024 15:43:33 -0400
Jon Maloy <jmaloy@redhat.com> wrote:
> In order to reduce memory and code footprint, we merge the array
As it is, with this one, I think we're actually wasting more memory,
because, if I understand this patch correctly, every ACK we send needs
its own 64k unused buffer.
> for l2 flag frames into the one for payload frames.
>
> This change also ensures that no flag message will be sent out
> over the l2 media bypassing already queued payload messages.
>
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>
> ---
> tcp.c | 3 +--
> tcp_buf.c | 70 ++++++++++++-------------------------------------------
> 2 files changed, 16 insertions(+), 57 deletions(-)
If I apply this on top of "[PATCH v3 0/2] tcp: unify IPv4 and IPv6 tap
queues" (it doesn't apply otherwise), it doesn't build:
tcp.c:1149:5: error: conflicting types for ‘tcp_prepare_flags’; have ‘int(const struct ctx *, struct tcp_tap_conn *, int, struct tcphdr *, uint8_t *, size_t *)’ {aka ‘int(const struct ctx *, struct tcp_tap_conn *, int, struct tcphdr *, unsigned char *, long unsigned int *)’}
1149 | int tcp_prepare_flags(const struct ctx *c, struct tcp_tap_conn *conn,
| ^~~~~~~~~~~~~~~~~
In file included from tcp.c:305:
tcp_internal.h:98:5: note: previous declaration of ‘tcp_prepare_flags’ with type ‘int(const struct ctx *, struct tcp_tap_conn *, int, struct tcphdr *, char *, size_t *)’ {aka ‘int(const struct ctx *, struct tcp_tap_conn *, int, struct tcphdr *, char *, long unsigned int *)’}
98 | int tcp_prepare_flags(const struct ctx *c, struct tcp_tap_conn *conn, int flags,
| ^~~~~~~~~~~~~~~~~
tcp_buf.c: In function ‘tcp_buf_send_flag’:
tcp_buf.c:204:40: warning: pointer targets in passing argument 5 of ‘tcp_prepare_flags’ differ in signedness [-Wpointer-sign]
204 | payload->data, &optlen);
| ~~~~~~~^~~~~~
| |
| uint8_t * {aka unsigned char *}
In file included from tcp_buf.c:33:
tcp_internal.h:99:48: note: expected ‘char *’ but argument is of type ‘uint8_t *’ {aka ‘unsigned char *’}
99 | struct tcphdr *th, char *data, size_t *optlen);
| ~~~~~~^~~~
make: *** [Makefile:122: passt] Error 1
> diff --git a/tcp.c b/tcp.c
> index 12e864f..dd060d0 100644
> --- a/tcp.c
> +++ b/tcp.c
> @@ -868,7 +868,6 @@ bool tcp_flow_defer(const struct tcp_tap_conn *conn)
> /* cppcheck-suppress [constParameterPointer, unmatchedSuppression] */
> void tcp_defer_handler(struct ctx *c)
> {
> - tcp_flags_flush(c);
> tcp_payload_flush(c);
> }
>
> @@ -1148,7 +1147,7 @@ static void tcp_update_seqack_from_tap(const struct ctx *c,
> * 1 otherwise
> */
> int tcp_prepare_flags(const struct ctx *c, struct tcp_tap_conn *conn,
> - int flags, struct tcphdr *th, char *data,
> + int flags, struct tcphdr *th, uint8_t *data,
> size_t *optlen)
> {
> struct tcp_info tinfo = { 0 };
> diff --git a/tcp_buf.c b/tcp_buf.c
> index 625c29c..58636d1 100644
> --- a/tcp_buf.c
> +++ b/tcp_buf.c
> @@ -33,6 +33,7 @@
> #include "tcp_internal.h"
> #include "tcp_buf.h"
>
> +#define PAYLOAD_FLAGS htons(0x5010) /* doff = 5, ack = 1 */
> #define TCP_FRAMES_MEM 128
> #define TCP_FRAMES \
> (c->mode == MODE_PASTA ? 1 : TCP_FRAMES_MEM)
> @@ -52,21 +53,6 @@ struct tcp_payload_t {
> } __attribute__ ((packed, aligned(__alignof__(unsigned int))));
> #endif
>
> -/**
> - * struct tcp_flags_t - TCP header and data to send zero-length
> - * segments (flags)
> - * @th: TCP header
> - * @opts TCP options
> - */
> -struct tcp_flags_t {
> - struct tcphdr th;
> - char opts[OPT_MSS_LEN + OPT_WS_LEN + 1];
> -#ifdef __AVX2__
> -} __attribute__ ((packed, aligned(32)));
> -#else
> -} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
> -#endif
> -
> /* Ethernet header for IPv4 and IPv6 frames */
> static struct ethhdr tcp4_eth_src;
> static struct ethhdr tcp6_eth_src;
> @@ -87,22 +73,10 @@ static_assert(MSS6 <= sizeof(tcp_payload[0].data), "MSS6 is greater than 65516")
> static struct tcp_tap_conn *tcp_frame_conns[TCP_FRAMES_MEM];
> static unsigned int tcp_payload_used;
>
> -static struct tap_hdr tcp_flags_tap_hdr[TCP_FRAMES_MEM];
> -/* IPv4 headers for TCP segment without payload */
> -static struct iphdr tcp4_flags_ip[TCP_FRAMES_MEM];
> -/* TCP segments without payload for IPv4 frames */
> -static struct tcp_flags_t tcp_flags[TCP_FRAMES_MEM];
> -
> -static unsigned int tcp_flags_used;
> -
> -/* IPv6 headers for TCP segment without payload */
> -static struct ipv6hdr tcp6_flags_ip[TCP_FRAMES_MEM];
> -
> /* recvmsg()/sendmsg() data for tap */
> static struct iovec iov_sock [TCP_FRAMES_MEM + 1];
>
> static struct iovec tcp_l2_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS];
> -static struct iovec tcp_l2_flags_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS];
>
> /**
> * tcp_update_l2_buf() - Update Ethernet header buffers with addresses
> @@ -135,13 +109,6 @@ void tcp_sock_iov_init(const struct ctx *c)
> tcp_payload[i].th.ack = 1;
> }
>
> - for (i = 0; i < ARRAY_SIZE(tcp_flags); i++) {
> - tcp6_flags_ip[i] = ip6;
> - tcp4_flags_ip[i] = iph;
> - tcp_flags[i].th.doff = sizeof(struct tcphdr) / 4;
> - tcp_flags[i].th.ack = 1;
> - }
> -
> for (i = 0; i < TCP_FRAMES_MEM; i++) {
> struct iovec *iov = tcp_l2_iov[i];
>
> @@ -149,14 +116,6 @@ void tcp_sock_iov_init(const struct ctx *c)
> iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr);
> iov[TCP_IOV_PAYLOAD].iov_base = &tcp_payload[i];
> }
> -
> - for (i = 0; i < TCP_FRAMES_MEM; i++) {
> - struct iovec *iov = tcp_l2_flags_iov[i];
> -
> - iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp_flags_tap_hdr[i]);
> - iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr);
> - iov[TCP_IOV_PAYLOAD].iov_base = &tcp_flags[i];
> - }
> }
>
> /**
> @@ -165,9 +124,7 @@ void tcp_sock_iov_init(const struct ctx *c)
> */
> void tcp_flags_flush(const struct ctx *c)
> {
> - tap_send_frames(c, &tcp_l2_flags_iov[0][0], TCP_NUM_IOVS,
> - tcp_flags_used);
> - tcp_flags_used = 0;
> + tcp_payload_flush(c);
Should this still be called tcp_payload_flush() instead of, say,
tcp_packets_flush() or tcp_frames_flush()? Does it make sense to
keep tcp_flags_flush() at all?
> }
>
> /**
> @@ -225,37 +182,35 @@ void tcp_payload_flush(const struct ctx *c)
> */
> int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
> {
> - struct tcp_flags_t *payload;
> + struct tcp_payload_t *payload;
Should the struct still be defined as tcp_payload_t (instead of
tcp_packet_t or tcp_frame_t) and this still be called payload?
> struct iovec *iov;
> size_t optlen;
> size_t l4len;
> uint32_t seq;
> int ret;
>
> - iov = tcp_l2_flags_iov[tcp_flags_used];
> + iov = tcp_l2_iov[tcp_payload_used];
> if (CONN_V4(conn)) {
> - iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_flags_ip[tcp_flags_used]);
> + iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[tcp_payload_used]);
Same with this counter.
> iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
> } else {
> - iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_flags_ip[tcp_flags_used]);
> + iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
> iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
> }
>
> payload = iov[TCP_IOV_PAYLOAD].iov_base;
> seq = conn->seq_to_tap;
> ret = tcp_prepare_flags(c, conn, flags, &payload->th,
> - payload->opts, &optlen);
> + payload->data, &optlen);
> if (ret <= 0)
> return ret;
>
> - tcp_flags_used++;
> + tcp_payload_used++;
> l4len = tcp_l2_buf_fill_headers(conn, iov, optlen, NULL, seq, false);
> iov[TCP_IOV_PAYLOAD].iov_len = l4len;
...with this, we still have 64k reserved just to write (typically) 20
bytes. I think that, for these frames, we could avoid wasting all this
memory by using a separate set of iov items and setting pointers as
needed.
> -
> if (flags & DUP_ACK) {
> - struct iovec *dup_iov;
> + struct iovec *dup_iov = tcp_l2_iov[tcp_payload_used++];
>
> - dup_iov = tcp_l2_flags_iov[tcp_flags_used++];
> memcpy(dup_iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_TAP].iov_base,
> iov[TCP_IOV_TAP].iov_len);
> dup_iov[TCP_IOV_ETH].iov_base = iov[TCP_IOV_ETH].iov_base;
> @@ -265,7 +220,7 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
> dup_iov[TCP_IOV_PAYLOAD].iov_len = l4len;
> }
>
> - if (tcp_flags_used > TCP_FRAMES_MEM - 2)
> + if (tcp_payload_used > TCP_FRAMES_MEM - 2)
> tcp_flags_flush(c);
>
> return 0;
> @@ -282,8 +237,10 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
> static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
> ssize_t dlen, int no_csum, uint32_t seq)
> {
> + struct tcp_payload_t *payload;
> const uint16_t *check = NULL;
> struct iovec *iov;
> + uint16_t *flags;
> size_t l4len;
>
> conn->seq_to_tap = seq + dlen;
> @@ -302,6 +259,9 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
> iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
> iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
> }
> + payload = iov[TCP_IOV_PAYLOAD].iov_base;
> + flags = &payload->th.window - 1;
I guess using offsetof(struct tcp_hdr, doff) is a bit less mysterious.
> + *(flags) = PAYLOAD_FLAGS;
Excess parentheses.
> l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq, false);
> iov[TCP_IOV_PAYLOAD].iov_len = l4len;
> if (++tcp_payload_used > TCP_FRAMES_MEM - 1)
--
Stefano
next prev parent reply other threads:[~2024-09-22 6:45 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-21 19:43 [PATCH] tcp: unify payload and flags l2 frames array Jon Maloy
2024-09-22 6:45 ` Stefano Brivio [this message]
2024-09-23 2:03 ` David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240922084508.04fe7206@elisabeth \
--to=sbrivio@redhat.com \
--cc=dgibson@redhat.com \
--cc=jmaloy@redhat.com \
--cc=lvivier@redhat.com \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).