* [PATCH] tcp: unify payload and flags l2 frames array
@ 2024-09-21 19:43 Jon Maloy
2024-09-22 6:45 ` Stefano Brivio
2024-09-23 2:03 ` David Gibson
0 siblings, 2 replies; 3+ messages in thread
From: Jon Maloy @ 2024-09-21 19:43 UTC (permalink / raw)
To: passt-dev, sbrivio, lvivier, dgibson, jmaloy
In order to reduce memory and code footprint, we merge the array
for l2 flag frames into the one for payload frames.
This change also ensures that no flag message will be sent out
over the l2 media bypassing already queued payload messages.
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
tcp.c | 3 +--
tcp_buf.c | 70 ++++++++++++-------------------------------------------
2 files changed, 16 insertions(+), 57 deletions(-)
diff --git a/tcp.c b/tcp.c
index 12e864f..dd060d0 100644
--- a/tcp.c
+++ b/tcp.c
@@ -868,7 +868,6 @@ bool tcp_flow_defer(const struct tcp_tap_conn *conn)
/* cppcheck-suppress [constParameterPointer, unmatchedSuppression] */
void tcp_defer_handler(struct ctx *c)
{
- tcp_flags_flush(c);
tcp_payload_flush(c);
}
@@ -1148,7 +1147,7 @@ static void tcp_update_seqack_from_tap(const struct ctx *c,
* 1 otherwise
*/
int tcp_prepare_flags(const struct ctx *c, struct tcp_tap_conn *conn,
- int flags, struct tcphdr *th, char *data,
+ int flags, struct tcphdr *th, uint8_t *data,
size_t *optlen)
{
struct tcp_info tinfo = { 0 };
diff --git a/tcp_buf.c b/tcp_buf.c
index 625c29c..58636d1 100644
--- a/tcp_buf.c
+++ b/tcp_buf.c
@@ -33,6 +33,7 @@
#include "tcp_internal.h"
#include "tcp_buf.h"
+#define PAYLOAD_FLAGS htons(0x5010) /* doff = 5, ack = 1 */
#define TCP_FRAMES_MEM 128
#define TCP_FRAMES \
(c->mode == MODE_PASTA ? 1 : TCP_FRAMES_MEM)
@@ -52,21 +53,6 @@ struct tcp_payload_t {
} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
#endif
-/**
- * struct tcp_flags_t - TCP header and data to send zero-length
- * segments (flags)
- * @th: TCP header
- * @opts TCP options
- */
-struct tcp_flags_t {
- struct tcphdr th;
- char opts[OPT_MSS_LEN + OPT_WS_LEN + 1];
-#ifdef __AVX2__
-} __attribute__ ((packed, aligned(32)));
-#else
-} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
-#endif
-
/* Ethernet header for IPv4 and IPv6 frames */
static struct ethhdr tcp4_eth_src;
static struct ethhdr tcp6_eth_src;
@@ -87,22 +73,10 @@ static_assert(MSS6 <= sizeof(tcp_payload[0].data), "MSS6 is greater than 65516")
static struct tcp_tap_conn *tcp_frame_conns[TCP_FRAMES_MEM];
static unsigned int tcp_payload_used;
-static struct tap_hdr tcp_flags_tap_hdr[TCP_FRAMES_MEM];
-/* IPv4 headers for TCP segment without payload */
-static struct iphdr tcp4_flags_ip[TCP_FRAMES_MEM];
-/* TCP segments without payload for IPv4 frames */
-static struct tcp_flags_t tcp_flags[TCP_FRAMES_MEM];
-
-static unsigned int tcp_flags_used;
-
-/* IPv6 headers for TCP segment without payload */
-static struct ipv6hdr tcp6_flags_ip[TCP_FRAMES_MEM];
-
/* recvmsg()/sendmsg() data for tap */
static struct iovec iov_sock [TCP_FRAMES_MEM + 1];
static struct iovec tcp_l2_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS];
-static struct iovec tcp_l2_flags_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS];
/**
* tcp_update_l2_buf() - Update Ethernet header buffers with addresses
@@ -135,13 +109,6 @@ void tcp_sock_iov_init(const struct ctx *c)
tcp_payload[i].th.ack = 1;
}
- for (i = 0; i < ARRAY_SIZE(tcp_flags); i++) {
- tcp6_flags_ip[i] = ip6;
- tcp4_flags_ip[i] = iph;
- tcp_flags[i].th.doff = sizeof(struct tcphdr) / 4;
- tcp_flags[i].th.ack = 1;
- }
-
for (i = 0; i < TCP_FRAMES_MEM; i++) {
struct iovec *iov = tcp_l2_iov[i];
@@ -149,14 +116,6 @@ void tcp_sock_iov_init(const struct ctx *c)
iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr);
iov[TCP_IOV_PAYLOAD].iov_base = &tcp_payload[i];
}
-
- for (i = 0; i < TCP_FRAMES_MEM; i++) {
- struct iovec *iov = tcp_l2_flags_iov[i];
-
- iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp_flags_tap_hdr[i]);
- iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr);
- iov[TCP_IOV_PAYLOAD].iov_base = &tcp_flags[i];
- }
}
/**
@@ -165,9 +124,7 @@ void tcp_sock_iov_init(const struct ctx *c)
*/
void tcp_flags_flush(const struct ctx *c)
{
- tap_send_frames(c, &tcp_l2_flags_iov[0][0], TCP_NUM_IOVS,
- tcp_flags_used);
- tcp_flags_used = 0;
+ tcp_payload_flush(c);
}
/**
@@ -225,37 +182,35 @@ void tcp_payload_flush(const struct ctx *c)
*/
int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
{
- struct tcp_flags_t *payload;
+ struct tcp_payload_t *payload;
struct iovec *iov;
size_t optlen;
size_t l4len;
uint32_t seq;
int ret;
- iov = tcp_l2_flags_iov[tcp_flags_used];
+ iov = tcp_l2_iov[tcp_payload_used];
if (CONN_V4(conn)) {
- iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_flags_ip[tcp_flags_used]);
+ iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[tcp_payload_used]);
iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
} else {
- iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_flags_ip[tcp_flags_used]);
+ iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
}
payload = iov[TCP_IOV_PAYLOAD].iov_base;
seq = conn->seq_to_tap;
ret = tcp_prepare_flags(c, conn, flags, &payload->th,
- payload->opts, &optlen);
+ payload->data, &optlen);
if (ret <= 0)
return ret;
- tcp_flags_used++;
+ tcp_payload_used++;
l4len = tcp_l2_buf_fill_headers(conn, iov, optlen, NULL, seq, false);
iov[TCP_IOV_PAYLOAD].iov_len = l4len;
-
if (flags & DUP_ACK) {
- struct iovec *dup_iov;
+ struct iovec *dup_iov = tcp_l2_iov[tcp_payload_used++];
- dup_iov = tcp_l2_flags_iov[tcp_flags_used++];
memcpy(dup_iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_TAP].iov_base,
iov[TCP_IOV_TAP].iov_len);
dup_iov[TCP_IOV_ETH].iov_base = iov[TCP_IOV_ETH].iov_base;
@@ -265,7 +220,7 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
dup_iov[TCP_IOV_PAYLOAD].iov_len = l4len;
}
- if (tcp_flags_used > TCP_FRAMES_MEM - 2)
+ if (tcp_payload_used > TCP_FRAMES_MEM - 2)
tcp_flags_flush(c);
return 0;
@@ -282,8 +237,10 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
ssize_t dlen, int no_csum, uint32_t seq)
{
+ struct tcp_payload_t *payload;
const uint16_t *check = NULL;
struct iovec *iov;
+ uint16_t *flags;
size_t l4len;
conn->seq_to_tap = seq + dlen;
@@ -302,6 +259,9 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
}
+ payload = iov[TCP_IOV_PAYLOAD].iov_base;
+ flags = &payload->th.window - 1;
+ *(flags) = PAYLOAD_FLAGS;
l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq, false);
iov[TCP_IOV_PAYLOAD].iov_len = l4len;
if (++tcp_payload_used > TCP_FRAMES_MEM - 1)
--
@@ -33,6 +33,7 @@
#include "tcp_internal.h"
#include "tcp_buf.h"
+#define PAYLOAD_FLAGS htons(0x5010) /* doff = 5, ack = 1 */
#define TCP_FRAMES_MEM 128
#define TCP_FRAMES \
(c->mode == MODE_PASTA ? 1 : TCP_FRAMES_MEM)
@@ -52,21 +53,6 @@ struct tcp_payload_t {
} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
#endif
-/**
- * struct tcp_flags_t - TCP header and data to send zero-length
- * segments (flags)
- * @th: TCP header
- * @opts TCP options
- */
-struct tcp_flags_t {
- struct tcphdr th;
- char opts[OPT_MSS_LEN + OPT_WS_LEN + 1];
-#ifdef __AVX2__
-} __attribute__ ((packed, aligned(32)));
-#else
-} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
-#endif
-
/* Ethernet header for IPv4 and IPv6 frames */
static struct ethhdr tcp4_eth_src;
static struct ethhdr tcp6_eth_src;
@@ -87,22 +73,10 @@ static_assert(MSS6 <= sizeof(tcp_payload[0].data), "MSS6 is greater than 65516")
static struct tcp_tap_conn *tcp_frame_conns[TCP_FRAMES_MEM];
static unsigned int tcp_payload_used;
-static struct tap_hdr tcp_flags_tap_hdr[TCP_FRAMES_MEM];
-/* IPv4 headers for TCP segment without payload */
-static struct iphdr tcp4_flags_ip[TCP_FRAMES_MEM];
-/* TCP segments without payload for IPv4 frames */
-static struct tcp_flags_t tcp_flags[TCP_FRAMES_MEM];
-
-static unsigned int tcp_flags_used;
-
-/* IPv6 headers for TCP segment without payload */
-static struct ipv6hdr tcp6_flags_ip[TCP_FRAMES_MEM];
-
/* recvmsg()/sendmsg() data for tap */
static struct iovec iov_sock [TCP_FRAMES_MEM + 1];
static struct iovec tcp_l2_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS];
-static struct iovec tcp_l2_flags_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS];
/**
* tcp_update_l2_buf() - Update Ethernet header buffers with addresses
@@ -135,13 +109,6 @@ void tcp_sock_iov_init(const struct ctx *c)
tcp_payload[i].th.ack = 1;
}
- for (i = 0; i < ARRAY_SIZE(tcp_flags); i++) {
- tcp6_flags_ip[i] = ip6;
- tcp4_flags_ip[i] = iph;
- tcp_flags[i].th.doff = sizeof(struct tcphdr) / 4;
- tcp_flags[i].th.ack = 1;
- }
-
for (i = 0; i < TCP_FRAMES_MEM; i++) {
struct iovec *iov = tcp_l2_iov[i];
@@ -149,14 +116,6 @@ void tcp_sock_iov_init(const struct ctx *c)
iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr);
iov[TCP_IOV_PAYLOAD].iov_base = &tcp_payload[i];
}
-
- for (i = 0; i < TCP_FRAMES_MEM; i++) {
- struct iovec *iov = tcp_l2_flags_iov[i];
-
- iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp_flags_tap_hdr[i]);
- iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr);
- iov[TCP_IOV_PAYLOAD].iov_base = &tcp_flags[i];
- }
}
/**
@@ -165,9 +124,7 @@ void tcp_sock_iov_init(const struct ctx *c)
*/
void tcp_flags_flush(const struct ctx *c)
{
- tap_send_frames(c, &tcp_l2_flags_iov[0][0], TCP_NUM_IOVS,
- tcp_flags_used);
- tcp_flags_used = 0;
+ tcp_payload_flush(c);
}
/**
@@ -225,37 +182,35 @@ void tcp_payload_flush(const struct ctx *c)
*/
int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
{
- struct tcp_flags_t *payload;
+ struct tcp_payload_t *payload;
struct iovec *iov;
size_t optlen;
size_t l4len;
uint32_t seq;
int ret;
- iov = tcp_l2_flags_iov[tcp_flags_used];
+ iov = tcp_l2_iov[tcp_payload_used];
if (CONN_V4(conn)) {
- iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_flags_ip[tcp_flags_used]);
+ iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[tcp_payload_used]);
iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
} else {
- iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_flags_ip[tcp_flags_used]);
+ iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
}
payload = iov[TCP_IOV_PAYLOAD].iov_base;
seq = conn->seq_to_tap;
ret = tcp_prepare_flags(c, conn, flags, &payload->th,
- payload->opts, &optlen);
+ payload->data, &optlen);
if (ret <= 0)
return ret;
- tcp_flags_used++;
+ tcp_payload_used++;
l4len = tcp_l2_buf_fill_headers(conn, iov, optlen, NULL, seq, false);
iov[TCP_IOV_PAYLOAD].iov_len = l4len;
-
if (flags & DUP_ACK) {
- struct iovec *dup_iov;
+ struct iovec *dup_iov = tcp_l2_iov[tcp_payload_used++];
- dup_iov = tcp_l2_flags_iov[tcp_flags_used++];
memcpy(dup_iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_TAP].iov_base,
iov[TCP_IOV_TAP].iov_len);
dup_iov[TCP_IOV_ETH].iov_base = iov[TCP_IOV_ETH].iov_base;
@@ -265,7 +220,7 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
dup_iov[TCP_IOV_PAYLOAD].iov_len = l4len;
}
- if (tcp_flags_used > TCP_FRAMES_MEM - 2)
+ if (tcp_payload_used > TCP_FRAMES_MEM - 2)
tcp_flags_flush(c);
return 0;
@@ -282,8 +237,10 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
ssize_t dlen, int no_csum, uint32_t seq)
{
+ struct tcp_payload_t *payload;
const uint16_t *check = NULL;
struct iovec *iov;
+ uint16_t *flags;
size_t l4len;
conn->seq_to_tap = seq + dlen;
@@ -302,6 +259,9 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
}
+ payload = iov[TCP_IOV_PAYLOAD].iov_base;
+ flags = &payload->th.window - 1;
+ *(flags) = PAYLOAD_FLAGS;
l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq, false);
iov[TCP_IOV_PAYLOAD].iov_len = l4len;
if (++tcp_payload_used > TCP_FRAMES_MEM - 1)
--
2.45.2
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] tcp: unify payload and flags l2 frames array
2024-09-21 19:43 [PATCH] tcp: unify payload and flags l2 frames array Jon Maloy
@ 2024-09-22 6:45 ` Stefano Brivio
2024-09-23 2:03 ` David Gibson
1 sibling, 0 replies; 3+ messages in thread
From: Stefano Brivio @ 2024-09-22 6:45 UTC (permalink / raw)
To: Jon Maloy; +Cc: passt-dev, lvivier, dgibson
On Sat, 21 Sep 2024 15:43:33 -0400
Jon Maloy <jmaloy@redhat.com> wrote:
> In order to reduce memory and code footprint, we merge the array
As it is, with this one, I think we're actually wasting more memory,
because, if I understand this patch correctly, every ACK we send needs
its own 64k unused buffer.
> for l2 flag frames into the one for payload frames.
>
> This change also ensures that no flag message will be sent out
> over the l2 media bypassing already queued payload messages.
>
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>
> ---
> tcp.c | 3 +--
> tcp_buf.c | 70 ++++++++++++-------------------------------------------
> 2 files changed, 16 insertions(+), 57 deletions(-)
If I apply this on top of "[PATCH v3 0/2] tcp: unify IPv4 and IPv6 tap
queues" (it doesn't apply otherwise), it doesn't build:
tcp.c:1149:5: error: conflicting types for ‘tcp_prepare_flags’; have ‘int(const struct ctx *, struct tcp_tap_conn *, int, struct tcphdr *, uint8_t *, size_t *)’ {aka ‘int(const struct ctx *, struct tcp_tap_conn *, int, struct tcphdr *, unsigned char *, long unsigned int *)’}
1149 | int tcp_prepare_flags(const struct ctx *c, struct tcp_tap_conn *conn,
| ^~~~~~~~~~~~~~~~~
In file included from tcp.c:305:
tcp_internal.h:98:5: note: previous declaration of ‘tcp_prepare_flags’ with type ‘int(const struct ctx *, struct tcp_tap_conn *, int, struct tcphdr *, char *, size_t *)’ {aka ‘int(const struct ctx *, struct tcp_tap_conn *, int, struct tcphdr *, char *, long unsigned int *)’}
98 | int tcp_prepare_flags(const struct ctx *c, struct tcp_tap_conn *conn, int flags,
| ^~~~~~~~~~~~~~~~~
tcp_buf.c: In function ‘tcp_buf_send_flag’:
tcp_buf.c:204:40: warning: pointer targets in passing argument 5 of ‘tcp_prepare_flags’ differ in signedness [-Wpointer-sign]
204 | payload->data, &optlen);
| ~~~~~~~^~~~~~
| |
| uint8_t * {aka unsigned char *}
In file included from tcp_buf.c:33:
tcp_internal.h:99:48: note: expected ‘char *’ but argument is of type ‘uint8_t *’ {aka ‘unsigned char *’}
99 | struct tcphdr *th, char *data, size_t *optlen);
| ~~~~~~^~~~
make: *** [Makefile:122: passt] Error 1
> diff --git a/tcp.c b/tcp.c
> index 12e864f..dd060d0 100644
> --- a/tcp.c
> +++ b/tcp.c
> @@ -868,7 +868,6 @@ bool tcp_flow_defer(const struct tcp_tap_conn *conn)
> /* cppcheck-suppress [constParameterPointer, unmatchedSuppression] */
> void tcp_defer_handler(struct ctx *c)
> {
> - tcp_flags_flush(c);
> tcp_payload_flush(c);
> }
>
> @@ -1148,7 +1147,7 @@ static void tcp_update_seqack_from_tap(const struct ctx *c,
> * 1 otherwise
> */
> int tcp_prepare_flags(const struct ctx *c, struct tcp_tap_conn *conn,
> - int flags, struct tcphdr *th, char *data,
> + int flags, struct tcphdr *th, uint8_t *data,
> size_t *optlen)
> {
> struct tcp_info tinfo = { 0 };
> diff --git a/tcp_buf.c b/tcp_buf.c
> index 625c29c..58636d1 100644
> --- a/tcp_buf.c
> +++ b/tcp_buf.c
> @@ -33,6 +33,7 @@
> #include "tcp_internal.h"
> #include "tcp_buf.h"
>
> +#define PAYLOAD_FLAGS htons(0x5010) /* doff = 5, ack = 1 */
> #define TCP_FRAMES_MEM 128
> #define TCP_FRAMES \
> (c->mode == MODE_PASTA ? 1 : TCP_FRAMES_MEM)
> @@ -52,21 +53,6 @@ struct tcp_payload_t {
> } __attribute__ ((packed, aligned(__alignof__(unsigned int))));
> #endif
>
> -/**
> - * struct tcp_flags_t - TCP header and data to send zero-length
> - * segments (flags)
> - * @th: TCP header
> - * @opts TCP options
> - */
> -struct tcp_flags_t {
> - struct tcphdr th;
> - char opts[OPT_MSS_LEN + OPT_WS_LEN + 1];
> -#ifdef __AVX2__
> -} __attribute__ ((packed, aligned(32)));
> -#else
> -} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
> -#endif
> -
> /* Ethernet header for IPv4 and IPv6 frames */
> static struct ethhdr tcp4_eth_src;
> static struct ethhdr tcp6_eth_src;
> @@ -87,22 +73,10 @@ static_assert(MSS6 <= sizeof(tcp_payload[0].data), "MSS6 is greater than 65516")
> static struct tcp_tap_conn *tcp_frame_conns[TCP_FRAMES_MEM];
> static unsigned int tcp_payload_used;
>
> -static struct tap_hdr tcp_flags_tap_hdr[TCP_FRAMES_MEM];
> -/* IPv4 headers for TCP segment without payload */
> -static struct iphdr tcp4_flags_ip[TCP_FRAMES_MEM];
> -/* TCP segments without payload for IPv4 frames */
> -static struct tcp_flags_t tcp_flags[TCP_FRAMES_MEM];
> -
> -static unsigned int tcp_flags_used;
> -
> -/* IPv6 headers for TCP segment without payload */
> -static struct ipv6hdr tcp6_flags_ip[TCP_FRAMES_MEM];
> -
> /* recvmsg()/sendmsg() data for tap */
> static struct iovec iov_sock [TCP_FRAMES_MEM + 1];
>
> static struct iovec tcp_l2_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS];
> -static struct iovec tcp_l2_flags_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS];
>
> /**
> * tcp_update_l2_buf() - Update Ethernet header buffers with addresses
> @@ -135,13 +109,6 @@ void tcp_sock_iov_init(const struct ctx *c)
> tcp_payload[i].th.ack = 1;
> }
>
> - for (i = 0; i < ARRAY_SIZE(tcp_flags); i++) {
> - tcp6_flags_ip[i] = ip6;
> - tcp4_flags_ip[i] = iph;
> - tcp_flags[i].th.doff = sizeof(struct tcphdr) / 4;
> - tcp_flags[i].th.ack = 1;
> - }
> -
> for (i = 0; i < TCP_FRAMES_MEM; i++) {
> struct iovec *iov = tcp_l2_iov[i];
>
> @@ -149,14 +116,6 @@ void tcp_sock_iov_init(const struct ctx *c)
> iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr);
> iov[TCP_IOV_PAYLOAD].iov_base = &tcp_payload[i];
> }
> -
> - for (i = 0; i < TCP_FRAMES_MEM; i++) {
> - struct iovec *iov = tcp_l2_flags_iov[i];
> -
> - iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp_flags_tap_hdr[i]);
> - iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr);
> - iov[TCP_IOV_PAYLOAD].iov_base = &tcp_flags[i];
> - }
> }
>
> /**
> @@ -165,9 +124,7 @@ void tcp_sock_iov_init(const struct ctx *c)
> */
> void tcp_flags_flush(const struct ctx *c)
> {
> - tap_send_frames(c, &tcp_l2_flags_iov[0][0], TCP_NUM_IOVS,
> - tcp_flags_used);
> - tcp_flags_used = 0;
> + tcp_payload_flush(c);
Should this still be called tcp_payload_flush() instead of, say,
tcp_packets_flush() or tcp_frames_flush()? Does it make sense to
keep tcp_flags_flush() at all?
> }
>
> /**
> @@ -225,37 +182,35 @@ void tcp_payload_flush(const struct ctx *c)
> */
> int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
> {
> - struct tcp_flags_t *payload;
> + struct tcp_payload_t *payload;
Should the struct still be defined as tcp_payload_t (instead of
tcp_packet_t or tcp_frame_t) and this still be called payload?
> struct iovec *iov;
> size_t optlen;
> size_t l4len;
> uint32_t seq;
> int ret;
>
> - iov = tcp_l2_flags_iov[tcp_flags_used];
> + iov = tcp_l2_iov[tcp_payload_used];
> if (CONN_V4(conn)) {
> - iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_flags_ip[tcp_flags_used]);
> + iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[tcp_payload_used]);
Same with this counter.
> iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
> } else {
> - iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_flags_ip[tcp_flags_used]);
> + iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
> iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
> }
>
> payload = iov[TCP_IOV_PAYLOAD].iov_base;
> seq = conn->seq_to_tap;
> ret = tcp_prepare_flags(c, conn, flags, &payload->th,
> - payload->opts, &optlen);
> + payload->data, &optlen);
> if (ret <= 0)
> return ret;
>
> - tcp_flags_used++;
> + tcp_payload_used++;
> l4len = tcp_l2_buf_fill_headers(conn, iov, optlen, NULL, seq, false);
> iov[TCP_IOV_PAYLOAD].iov_len = l4len;
...with this, we still have 64k reserved just to write (typically) 20
bytes. I think that, for these frames, we could avoid wasting all this
memory by using a separate set of iov items and setting pointers as
needed.
> -
> if (flags & DUP_ACK) {
> - struct iovec *dup_iov;
> + struct iovec *dup_iov = tcp_l2_iov[tcp_payload_used++];
>
> - dup_iov = tcp_l2_flags_iov[tcp_flags_used++];
> memcpy(dup_iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_TAP].iov_base,
> iov[TCP_IOV_TAP].iov_len);
> dup_iov[TCP_IOV_ETH].iov_base = iov[TCP_IOV_ETH].iov_base;
> @@ -265,7 +220,7 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
> dup_iov[TCP_IOV_PAYLOAD].iov_len = l4len;
> }
>
> - if (tcp_flags_used > TCP_FRAMES_MEM - 2)
> + if (tcp_payload_used > TCP_FRAMES_MEM - 2)
> tcp_flags_flush(c);
>
> return 0;
> @@ -282,8 +237,10 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
> static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
> ssize_t dlen, int no_csum, uint32_t seq)
> {
> + struct tcp_payload_t *payload;
> const uint16_t *check = NULL;
> struct iovec *iov;
> + uint16_t *flags;
> size_t l4len;
>
> conn->seq_to_tap = seq + dlen;
> @@ -302,6 +259,9 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
> iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
> iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
> }
> + payload = iov[TCP_IOV_PAYLOAD].iov_base;
> + flags = &payload->th.window - 1;
I guess using offsetof(struct tcp_hdr, doff) is a bit less mysterious.
> + *(flags) = PAYLOAD_FLAGS;
Excess parentheses.
> l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq, false);
> iov[TCP_IOV_PAYLOAD].iov_len = l4len;
> if (++tcp_payload_used > TCP_FRAMES_MEM - 1)
--
Stefano
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] tcp: unify payload and flags l2 frames array
2024-09-21 19:43 [PATCH] tcp: unify payload and flags l2 frames array Jon Maloy
2024-09-22 6:45 ` Stefano Brivio
@ 2024-09-23 2:03 ` David Gibson
1 sibling, 0 replies; 3+ messages in thread
From: David Gibson @ 2024-09-23 2:03 UTC (permalink / raw)
To: Jon Maloy; +Cc: passt-dev, sbrivio, lvivier, dgibson
[-- Attachment #1: Type: text/plain, Size: 7986 bytes --]
On Sat, Sep 21, 2024 at 03:43:33PM -0400, Jon Maloy wrote:
> In order to reduce memory and code footprint, we merge the array
> for l2 flag frames into the one for payload frames.
>
> This change also ensures that no flag message will be sent out
> over the l2 media bypassing already queued payload messages.
>
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>
> ---
> tcp.c | 3 +--
> tcp_buf.c | 70 ++++++++++++-------------------------------------------
> 2 files changed, 16 insertions(+), 57 deletions(-)
>
> diff --git a/tcp.c b/tcp.c
> index 12e864f..dd060d0 100644
> --- a/tcp.c
> +++ b/tcp.c
> @@ -868,7 +868,6 @@ bool tcp_flow_defer(const struct tcp_tap_conn *conn)
> /* cppcheck-suppress [constParameterPointer, unmatchedSuppression] */
> void tcp_defer_handler(struct ctx *c)
> {
> - tcp_flags_flush(c);
> tcp_payload_flush(c);
As Stefano also noted, I think it would make more sense to just
combine these into a single "tcp_flush()".
> }
>
> @@ -1148,7 +1147,7 @@ static void tcp_update_seqack_from_tap(const struct ctx *c,
> * 1 otherwise
> */
> int tcp_prepare_flags(const struct ctx *c, struct tcp_tap_conn *conn,
> - int flags, struct tcphdr *th, char *data,
> + int flags, struct tcphdr *th, uint8_t *data,
This seems like an unrelated change to the rest.
> size_t *optlen)
> {
> struct tcp_info tinfo = { 0 };
> diff --git a/tcp_buf.c b/tcp_buf.c
> index 625c29c..58636d1 100644
> --- a/tcp_buf.c
> +++ b/tcp_buf.c
> @@ -33,6 +33,7 @@
> #include "tcp_internal.h"
> #include "tcp_buf.h"
>
> +#define PAYLOAD_FLAGS htons(0x5010) /* doff = 5, ack = 1 */
> #define TCP_FRAMES_MEM 128
> #define TCP_FRAMES \
> (c->mode == MODE_PASTA ? 1 : TCP_FRAMES_MEM)
> @@ -52,21 +53,6 @@ struct tcp_payload_t {
> } __attribute__ ((packed, aligned(__alignof__(unsigned int))));
> #endif
>
> -/**
> - * struct tcp_flags_t - TCP header and data to send zero-length
> - * segments (flags)
> - * @th: TCP header
> - * @opts TCP options
> - */
> -struct tcp_flags_t {
> - struct tcphdr th;
> - char opts[OPT_MSS_LEN + OPT_WS_LEN + 1];
> -#ifdef __AVX2__
> -} __attribute__ ((packed, aligned(32)));
> -#else
> -} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
> -#endif
> -
> /* Ethernet header for IPv4 and IPv6 frames */
> static struct ethhdr tcp4_eth_src;
> static struct ethhdr tcp6_eth_src;
> @@ -87,22 +73,10 @@ static_assert(MSS6 <= sizeof(tcp_payload[0].data), "MSS6 is greater than 65516")
> static struct tcp_tap_conn *tcp_frame_conns[TCP_FRAMES_MEM];
> static unsigned int tcp_payload_used;
Similarly changing this just to "tcp_buf_used" or the like would make
sense.
> -static struct tap_hdr tcp_flags_tap_hdr[TCP_FRAMES_MEM];
> -/* IPv4 headers for TCP segment without payload */
> -static struct iphdr tcp4_flags_ip[TCP_FRAMES_MEM];
> -/* TCP segments without payload for IPv4 frames */
> -static struct tcp_flags_t tcp_flags[TCP_FRAMES_MEM];
> -
> -static unsigned int tcp_flags_used;
> -
> -/* IPv6 headers for TCP segment without payload */
> -static struct ipv6hdr tcp6_flags_ip[TCP_FRAMES_MEM];
> -
> /* recvmsg()/sendmsg() data for tap */
> static struct iovec iov_sock [TCP_FRAMES_MEM + 1];
>
> static struct iovec tcp_l2_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS];
> -static struct iovec tcp_l2_flags_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS];
>
> /**
> * tcp_update_l2_buf() - Update Ethernet header buffers with addresses
> @@ -135,13 +109,6 @@ void tcp_sock_iov_init(const struct ctx *c)
> tcp_payload[i].th.ack = 1;
> }
>
> - for (i = 0; i < ARRAY_SIZE(tcp_flags); i++) {
> - tcp6_flags_ip[i] = ip6;
> - tcp4_flags_ip[i] = iph;
> - tcp_flags[i].th.doff = sizeof(struct tcphdr) / 4;
> - tcp_flags[i].th.ack = 1;
> - }
> -
> for (i = 0; i < TCP_FRAMES_MEM; i++) {
> struct iovec *iov = tcp_l2_iov[i];
>
> @@ -149,14 +116,6 @@ void tcp_sock_iov_init(const struct ctx *c)
> iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr);
> iov[TCP_IOV_PAYLOAD].iov_base = &tcp_payload[i];
> }
> -
> - for (i = 0; i < TCP_FRAMES_MEM; i++) {
> - struct iovec *iov = tcp_l2_flags_iov[i];
> -
> - iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp_flags_tap_hdr[i]);
> - iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr);
> - iov[TCP_IOV_PAYLOAD].iov_base = &tcp_flags[i];
> - }
> }
>
> /**
> @@ -165,9 +124,7 @@ void tcp_sock_iov_init(const struct ctx *c)
> */
> void tcp_flags_flush(const struct ctx *c)
> {
> - tap_send_frames(c, &tcp_l2_flags_iov[0][0], TCP_NUM_IOVS,
> - tcp_flags_used);
> - tcp_flags_used = 0;
> + tcp_payload_flush(c);
I think tcp_flags_flush() can just be removed entirely.
> }
>
> /**
> @@ -225,37 +182,35 @@ void tcp_payload_flush(const struct ctx *c)
> */
> int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
> {
> - struct tcp_flags_t *payload;
> + struct tcp_payload_t *payload;
> struct iovec *iov;
> size_t optlen;
> size_t l4len;
> uint32_t seq;
> int ret;
>
> - iov = tcp_l2_flags_iov[tcp_flags_used];
> + iov = tcp_l2_iov[tcp_payload_used];
> if (CONN_V4(conn)) {
> - iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_flags_ip[tcp_flags_used]);
> + iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[tcp_payload_used]);
> iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
> } else {
> - iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_flags_ip[tcp_flags_used]);
> + iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
> iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
> }
>
> payload = iov[TCP_IOV_PAYLOAD].iov_base;
> seq = conn->seq_to_tap;
> ret = tcp_prepare_flags(c, conn, flags, &payload->th,
> - payload->opts, &optlen);
> + payload->data, &optlen);
> if (ret <= 0)
> return ret;
>
> - tcp_flags_used++;
> + tcp_payload_used++;
> l4len = tcp_l2_buf_fill_headers(conn, iov, optlen, NULL, seq, false);
> iov[TCP_IOV_PAYLOAD].iov_len = l4len;
> -
> if (flags & DUP_ACK) {
> - struct iovec *dup_iov;
> + struct iovec *dup_iov = tcp_l2_iov[tcp_payload_used++];
>
> - dup_iov = tcp_l2_flags_iov[tcp_flags_used++];
> memcpy(dup_iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_TAP].iov_base,
> iov[TCP_IOV_TAP].iov_len);
> dup_iov[TCP_IOV_ETH].iov_base = iov[TCP_IOV_ETH].iov_base;
> @@ -265,7 +220,7 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
> dup_iov[TCP_IOV_PAYLOAD].iov_len = l4len;
> }
>
> - if (tcp_flags_used > TCP_FRAMES_MEM - 2)
> + if (tcp_payload_used > TCP_FRAMES_MEM - 2)
> tcp_flags_flush(c);
>
> return 0;
> @@ -282,8 +237,10 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
> static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
> ssize_t dlen, int no_csum, uint32_t seq)
> {
> + struct tcp_payload_t *payload;
> const uint16_t *check = NULL;
> struct iovec *iov;
> + uint16_t *flags;
> size_t l4len;
>
> conn->seq_to_tap = seq + dlen;
> @@ -302,6 +259,9 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
> iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
> iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
> }
> + payload = iov[TCP_IOV_PAYLOAD].iov_base;
> + flags = &payload->th.window - 1;
I think this will run afoul of the C strict aliasing rules (they're
disabled in the kernel, but not for us).
> + *(flags) = PAYLOAD_FLAGS;
> l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq, false);
> iov[TCP_IOV_PAYLOAD].iov_len = l4len;
> if (++tcp_payload_used > TCP_FRAMES_MEM - 1)
--
David Gibson (he or they) | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you, not the other way
| around.
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2024-09-23 2:04 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-09-21 19:43 [PATCH] tcp: unify payload and flags l2 frames array Jon Maloy
2024-09-22 6:45 ` Stefano Brivio
2024-09-23 2:03 ` David Gibson
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).