public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: Stefano Brivio <sbrivio@redhat.com>
To: passt-dev@passt.top
Cc: David Gibson <david@gibson.dropbear.id.au>,
	Laurent Vivier <lvivier@redhat.com>
Subject: [PATCH v2 4/5] tcp, udp: Pad batched frames to 60 bytes (802.3 minimum) in non-vhost-user modes
Date: Fri,  5 Dec 2025 01:51:56 +0100	[thread overview]
Message-ID: <20251205005157.2577523-5-sbrivio@redhat.com> (raw)
In-Reply-To: <20251205005157.2577523-1-sbrivio@redhat.com>

Add a further iovec frame part, TCP_IOV_ETH_PAD for TCP and
UDP_IOV_ETH_PAD for UDP, after the payload, make that point to a
zero-filled buffer, and send out a part of it if needed to reach
the minimum frame length given by 802.3, that is, 60 bytes altogether.

The frames we might need to pad are IPv4 only (the IPv6 header is
larger), and are typically TCP ACK segments but can also be small
data segments or datagrams.

Link: https://bugs.passt.top/show_bug.cgi?id=166
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
---
 tcp_buf.c      | 23 +++++++++++++++++++++++
 tcp_internal.h |  2 ++
 udp.c          | 21 +++++++++++++++++++++
 util.c         |  3 +++
 util.h         |  3 +++
 5 files changed, 52 insertions(+)

diff --git a/tcp_buf.c b/tcp_buf.c
index 2058225..5d419d3 100644
--- a/tcp_buf.c
+++ b/tcp_buf.c
@@ -96,6 +96,7 @@ void tcp_sock_iov_init(const struct ctx *c)
 		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp_payload_tap_hdr[i]);
 		iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr);
 		iov[TCP_IOV_PAYLOAD].iov_base = &tcp_payload[i];
+		iov[TCP_IOV_ETH_PAD].iov_base = eth_pad;
 	}
 }
 
@@ -144,6 +145,22 @@ void tcp_payload_flush(const struct ctx *c)
 	tcp_payload_used = 0;
 }
 
+/**
+ * tcp_l2_buf_pad() - Calculate padding to send out of padding (zero) buffer
+ * @iov:	Pointer to iovec of frame parts we're about to send
+ */
+static void tcp_l2_buf_pad(struct iovec *iov)
+{
+	size_t l2len = iov[TCP_IOV_ETH].iov_len +
+		       iov[TCP_IOV_IP].iov_len +
+		       iov[TCP_IOV_PAYLOAD].iov_len;
+
+	if (l2len < ETH_ZLEN)
+		iov[TCP_IOV_ETH_PAD].iov_len = ETH_ZLEN - l2len;
+	else
+		iov[TCP_IOV_ETH_PAD].iov_len = 0;
+}
+
 /**
  * tcp_l2_buf_fill_headers() - Fill 802.3, IP, TCP headers in pre-cooked buffers
  * @c:		Execution context
@@ -212,6 +229,8 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
 	iov[TCP_IOV_PAYLOAD].iov_len = l4len;
 	tcp_l2_buf_fill_headers(c, conn, iov, NULL, seq, false);
 
+	tcp_l2_buf_pad(iov);
+
 	if (flags & DUP_ACK) {
 		struct iovec *dup_iov = tcp_l2_iov[tcp_payload_used];
 		tcp_frame_conns[tcp_payload_used++] = conn;
@@ -223,6 +242,7 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
 		memcpy(dup_iov[TCP_IOV_PAYLOAD].iov_base,
 		       iov[TCP_IOV_PAYLOAD].iov_base, l4len);
 		dup_iov[TCP_IOV_PAYLOAD].iov_len = l4len;
+		dup_iov[TCP_IOV_ETH_PAD].iov_len = iov[TCP_IOV_ETH_PAD].iov_len;
 	}
 
 	if (tcp_payload_used > TCP_FRAMES_MEM - 2)
@@ -270,6 +290,9 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
 	payload->th.psh = push;
 	iov[TCP_IOV_PAYLOAD].iov_len = dlen + sizeof(struct tcphdr);
 	tcp_l2_buf_fill_headers(c, conn, iov, check, seq, false);
+
+	tcp_l2_buf_pad(iov);
+
 	if (++tcp_payload_used > TCP_FRAMES_MEM - 1)
 		tcp_payload_flush(c);
 }
diff --git a/tcp_internal.h b/tcp_internal.h
index 19e8922..5f8fb35 100644
--- a/tcp_internal.h
+++ b/tcp_internal.h
@@ -63,6 +63,7 @@
  * @TCP_IOV_ETH		Ethernet header
  * @TCP_IOV_IP		IP (v4/v6) header
  * @TCP_IOV_PAYLOAD	IP payload (TCP header + data)
+ * @TCP_IOV_ETH_PAD	Ethernet (802.3) padding to 60 bytes
  * @TCP_NUM_IOVS 	the number of entries in the iovec array
  */
 enum tcp_iov_parts {
@@ -70,6 +71,7 @@ enum tcp_iov_parts {
 	TCP_IOV_ETH	= 1,
 	TCP_IOV_IP	= 2,
 	TCP_IOV_PAYLOAD	= 3,
+	TCP_IOV_ETH_PAD	= 4,
 	TCP_NUM_IOVS
 };
 
diff --git a/udp.c b/udp.c
index b93c18b..f32f553 100644
--- a/udp.c
+++ b/udp.c
@@ -168,6 +168,7 @@ udp_meta[UDP_MAX_FRAMES];
  * @UDP_IOV_ETH		Ethernet header
  * @UDP_IOV_IP		IP (v4/v6) header
  * @UDP_IOV_PAYLOAD	IP payload (UDP header + data)
+ * @UDP_IOV_ETH_PAD	Ethernet (802.3) padding to 60 bytes
  * @UDP_NUM_IOVS	the number of entries in the iovec array
  */
 enum udp_iov_idx {
@@ -175,6 +176,7 @@ enum udp_iov_idx {
 	UDP_IOV_ETH,
 	UDP_IOV_IP,
 	UDP_IOV_PAYLOAD,
+	UDP_IOV_ETH_PAD,
 	UDP_NUM_IOVS,
 };
 
@@ -239,6 +241,7 @@ static void udp_iov_init_one(const struct ctx *c, size_t i)
 	tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(udp_eth_hdr[i]);
 	tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &meta->taph);
 	tiov[UDP_IOV_PAYLOAD].iov_base = payload;
+	tiov[UDP_IOV_ETH_PAD].iov_base = eth_pad;
 
 	mh->msg_iov	= siov;
 	mh->msg_iovlen	= 1;
@@ -344,6 +347,22 @@ size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
 	return l4len;
 }
 
+/**
+ * udp_tap_pad() - Calculate padding to send out of padding (zero) buffer
+ * @iov:	Pointer to iovec of frame parts we're about to send
+ */
+static void udp_tap_pad(struct iovec *iov)
+{
+	size_t l2len = iov[UDP_IOV_ETH].iov_len +
+		       iov[UDP_IOV_IP].iov_len +
+		       iov[UDP_IOV_PAYLOAD].iov_len;
+
+	if (l2len < ETH_ZLEN)
+		iov[UDP_IOV_ETH_PAD].iov_len = ETH_ZLEN - l2len;
+	else
+		iov[UDP_IOV_ETH_PAD].iov_len = 0;
+}
+
 /**
  * udp_tap_prepare() - Convert one datagram into a tap frame
  * @mmh:	Receiving mmsghdr array
@@ -379,6 +398,8 @@ static void udp_tap_prepare(const struct mmsghdr *mmh,
 		(*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip4h);
 	}
 	(*tap_iov)[UDP_IOV_PAYLOAD].iov_len = l4len;
+
+	udp_tap_pad(*tap_iov);
 }
 
 /**
diff --git a/util.c b/util.c
index 590373d..d2039df 100644
--- a/util.c
+++ b/util.c
@@ -39,6 +39,9 @@
 #include <sys/random.h>
 #endif
 
+/* Zero-filled buffer to pad 802.3 frames, up to 60 (ETH_ZLEN) bytes */
+uint8_t eth_pad[ETH_ZLEN] = { 0 };
+
 /**
  * sock_l4_() - Create and bind socket to socket address
  * @c:		Execution context
diff --git a/util.h b/util.h
index 40de694..326012c 100644
--- a/util.h
+++ b/util.h
@@ -17,6 +17,7 @@
 #include <arpa/inet.h>
 #include <unistd.h>
 #include <sys/syscall.h>
+#include <net/ethernet.h>
 
 #include "log.h"
 
@@ -152,6 +153,8 @@ void abort_with_msg(const char *fmt, ...)
 #define ntohll(x)		(be64toh((x)))
 #define htonll(x)		(htobe64((x)))
 
+extern uint8_t eth_pad[ETH_ZLEN];
+
 /**
  * ntohl_unaligned() - Read 32-bit BE value from a possibly unaligned address
  * @p:		Pointer to the BE value in memory
-- 
2.43.0


  parent reply	other threads:[~2025-12-05  0:51 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-05  0:51 [PATCH v2 0/5] Pad all inbound frames to 802.3 minimum size if needed Stefano Brivio
2025-12-05  0:51 ` [PATCH v2 1/5] tap: Pad non-batched frames to 802.3 minimum (60 bytes) " Stefano Brivio
2025-12-05  3:23   ` David Gibson
2025-12-05  0:51 ` [PATCH v2 2/5] tcp: Fix coding style for comment to enum tcp_iov_parts Stefano Brivio
2025-12-05  0:51 ` [PATCH v2 3/5] udp: Fix coding style for comment to enum udp_iov_idx Stefano Brivio
2025-12-05  0:51 ` Stefano Brivio [this message]
2025-12-05  5:48   ` [PATCH v2 4/5] tcp, udp: Pad batched frames to 60 bytes (802.3 minimum) in non-vhost-user modes Stefano Brivio
2025-12-05 11:27     ` Stefano Brivio
2025-12-05  0:51 ` [PATCH v2 5/5] tcp, udp: Pad batched frames for vhost-user modes to 60 bytes (802.3 minimum) Stefano Brivio
2025-12-05  4:07   ` David Gibson
2025-12-06  1:26     ` Stefano Brivio

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251205005157.2577523-5-sbrivio@redhat.com \
    --to=sbrivio@redhat.com \
    --cc=david@gibson.dropbear.id.au \
    --cc=lvivier@redhat.com \
    --cc=passt-dev@passt.top \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).