From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>, passt-dev@passt.top
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v2 2/7] udp: Split tap-bound UDP packets into multiple buffers using io vector
Date: Wed, 1 May 2024 18:31:05 +1000 [thread overview]
Message-ID: <20240501083110.2172770-3-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20240501083110.2172770-1-david@gibson.dropbear.id.au>
When sending to the tap device, currently we assemble the headers and
payload into a single contiguous buffer. Those are described by a single
struct iovec, then a batch of frames is sent to the device with
tap_send_frames().
In order to better integrate the IPv4 and IPv6 paths, we want the IP
header in a different buffer that might not be contiguous with the
payload. To prepare for that, split the UDP packet into an iovec of
buffers. We use the same split that Laurent recently introduced for
TCP for convenience.
This removes the last use of tap_hdr_len_(), tap_frame_base() and
tap_frame_len(), so remove those too.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
tap.h | 38 ------------------------------
udp.c | 74 +++++++++++++++++++++++++++++++++++++++--------------------
2 files changed, 49 insertions(+), 63 deletions(-)
diff --git a/tap.h b/tap.h
index 9216d5af..d146d2f1 100644
--- a/tap.h
+++ b/tap.h
@@ -43,44 +43,6 @@ static inline void tap_hdr_update(struct tap_hdr *thdr, size_t l2len)
thdr->vnet_len = htonl(l2len);
}
-static inline size_t tap_hdr_len_(const struct ctx *c)
-{
- if (c->mode == MODE_PASST)
- return sizeof(struct tap_hdr);
- else
- return 0;
-}
-
-/**
- * tap_frame_base() - Find start of tap frame
- * @c: Execution context
- * @taph: Pointer to tap specific header buffer
- *
- * Returns: pointer to the start of tap frame - suitable for an
- * iov_base to be passed to tap_send_frames())
- */
-static inline void *tap_frame_base(const struct ctx *c, struct tap_hdr *taph)
-{
- return (char *)(taph + 1) - tap_hdr_len_(c);
-}
-
-/**
- * tap_frame_len() - Finalize tap frame and return total length
- * @c: Execution context
- * @taph: Tap header to finalize
- * @l2len: L2 packet length (includes L2, excludes tap specific headers)
- *
- * Returns: length of the tap frame including tap specific headers - suitable
- * for an iov_len to be passed to tap_send_frames()
- */
-static inline size_t tap_frame_len(const struct ctx *c, struct tap_hdr *taph,
- size_t l2len)
-{
- if (c->mode == MODE_PASST)
- taph->vnet_len = htonl(l2len);
- return l2len + tap_hdr_len_(c);
-}
-
struct in_addr tap_ip4_daddr(const struct ctx *c);
void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
struct in_addr dst, in_port_t dport,
diff --git a/udp.c b/udp.c
index 7186fae9..d293bc5d 100644
--- a/udp.c
+++ b/udp.c
@@ -222,12 +222,28 @@ struct udp6_l2_buf_t {
#endif
udp6_l2_buf[UDP_MAX_FRAMES];
+/**
+ * enum udp_iov_idx - Indices for the buffers making up a single UDP frame
+ * @UDP_IOV_TAP tap specific header
+ * @UDP_IOV_ETH Ethernet header
+ * @UDP_IOV_IP IP (v4/v6) header
+ * @UDP_IOV_PAYLOAD IP payload (UDP header + data)
+ * @UDP_NUM_IOVS the number of entries in the iovec array
+ */
+enum udp_iov_idx {
+ UDP_IOV_TAP = 0,
+ UDP_IOV_ETH = 1,
+ UDP_IOV_IP = 2,
+ UDP_IOV_PAYLOAD = 3,
+ UDP_NUM_IOVS
+};
+
/* recvmmsg()/sendmmsg() data for tap */
static struct iovec udp4_l2_iov_sock [UDP_MAX_FRAMES];
static struct iovec udp6_l2_iov_sock [UDP_MAX_FRAMES];
-static struct iovec udp4_l2_iov_tap [UDP_MAX_FRAMES];
-static struct iovec udp6_l2_iov_tap [UDP_MAX_FRAMES];
+static struct iovec udp4_l2_iov_tap [UDP_MAX_FRAMES][UDP_NUM_IOVS];
+static struct iovec udp6_l2_iov_tap [UDP_MAX_FRAMES][UDP_NUM_IOVS];
static struct mmsghdr udp4_l2_mh_sock [UDP_MAX_FRAMES];
static struct mmsghdr udp6_l2_mh_sock [UDP_MAX_FRAMES];
@@ -309,7 +325,7 @@ static void udp_sock4_iov_init_one(const struct ctx *c, size_t i)
struct msghdr *mh = &udp4_l2_mh_sock[i].msg_hdr;
struct udp4_l2_buf_t *buf = &udp4_l2_buf[i];
struct iovec *siov = &udp4_l2_iov_sock[i];
- struct iovec *tiov = &udp4_l2_iov_tap[i];
+ struct iovec *tiov = udp4_l2_iov_tap[i];
*buf = (struct udp4_l2_buf_t) {
.eh = ETH_HDR_INIT(ETH_P_IP),
@@ -323,7 +339,10 @@ static void udp_sock4_iov_init_one(const struct ctx *c, size_t i)
mh->msg_iov = siov;
mh->msg_iovlen = 1;
- tiov->iov_base = tap_frame_base(c, &buf->taph);
+ tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &buf->taph);
+ tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(buf->eh);
+ tiov[UDP_IOV_IP] = IOV_OF_LVALUE(buf->iph);
+ tiov[UDP_IOV_PAYLOAD].iov_base = &buf->uh;
}
/**
@@ -336,7 +355,7 @@ static void udp_sock6_iov_init_one(const struct ctx *c, size_t i)
struct msghdr *mh = &udp6_l2_mh_sock[i].msg_hdr;
struct udp6_l2_buf_t *buf = &udp6_l2_buf[i];
struct iovec *siov = &udp6_l2_iov_sock[i];
- struct iovec *tiov = &udp6_l2_iov_tap[i];
+ struct iovec *tiov = udp6_l2_iov_tap[i];
*buf = (struct udp6_l2_buf_t) {
.eh = ETH_HDR_INIT(ETH_P_IPV6),
@@ -350,7 +369,10 @@ static void udp_sock6_iov_init_one(const struct ctx *c, size_t i)
mh->msg_iov = siov;
mh->msg_iovlen = 1;
- tiov->iov_base = tap_frame_base(c, &buf->taph);
+ tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &buf->taph);
+ tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(buf->eh);
+ tiov[UDP_IOV_IP] = IOV_OF_LVALUE(buf->ip6h);
+ tiov[UDP_IOV_PAYLOAD].iov_base = &buf->uh;
}
/**
@@ -572,13 +594,14 @@ static void udp_splice_sendfrom(const struct ctx *c, unsigned start, unsigned n,
* @dlen: Length of UDP payload
* @now: Current timestamp
*
- * Return: size of tap frame with headers
+ * Return: size of IPv4 payload (UDP header + data)
*/
static size_t udp_update_hdr4(const struct ctx *c, struct udp4_l2_buf_t *b,
in_port_t dstport, size_t dlen,
const struct timespec *now)
{
- size_t l3len = dlen + sizeof(b->iph) + sizeof(b->uh);
+ size_t l4len = dlen + sizeof(b->uh);
+ size_t l3len = l4len + sizeof(b->iph);
in_port_t srcport = ntohs(b->s_in.sin_port);
struct in_addr src = b->s_in.sin_addr;
@@ -609,9 +632,10 @@ static size_t udp_update_hdr4(const struct ctx *c, struct udp4_l2_buf_t *b,
b->uh.source = b->s_in.sin_port;
b->uh.dest = htons(dstport);
- b->uh.len = htons(dlen + sizeof(b->uh));
+ b->uh.len = htons(l4len);
- return tap_frame_len(c, &b->taph, l3len + sizeof(b->eh));
+ tap_hdr_update(&b->taph, l3len + sizeof(b->eh));
+ return l4len;
}
/**
@@ -622,7 +646,7 @@ static size_t udp_update_hdr4(const struct ctx *c, struct udp4_l2_buf_t *b,
* @dlen: Length of UDP payload
* @now: Current timestamp
*
- * Return: size of tap frame with headers
+ * Return: size of IPv6 payload (UDP header + data)
*/
static size_t udp_update_hdr6(const struct ctx *c, struct udp6_l2_buf_t *b,
in_port_t dstport, size_t dlen,
@@ -679,8 +703,8 @@ static size_t udp_update_hdr6(const struct ctx *c, struct udp6_l2_buf_t *b,
b->uh.len = b->ip6h.payload_len;
csum_udp6(&b->uh, src, dst, b->data, dlen);
- return tap_frame_len(c, &b->taph, l4len +
- sizeof(b->ip6h) + sizeof(b->eh));
+ tap_hdr_update(&b->taph, l4len + sizeof(b->ip6h) + sizeof(b->eh));
+ return l4len;
}
/**
@@ -698,8 +722,8 @@ static void udp_tap_send(const struct ctx *c,
unsigned int start, unsigned int n,
in_port_t dstport, bool v6, const struct timespec *now)
{
- struct iovec *tap_iov;
- unsigned int i;
+ struct iovec (*tap_iov)[UDP_NUM_IOVS];
+ size_t i;
if (v6)
tap_iov = udp6_l2_iov_tap;
@@ -707,19 +731,19 @@ static void udp_tap_send(const struct ctx *c,
tap_iov = udp4_l2_iov_tap;
for (i = start; i < start + n; i++) {
- size_t buf_len;
-
- if (v6)
- buf_len = udp_update_hdr6(c, &udp6_l2_buf[i], dstport,
- udp6_l2_mh_sock[i].msg_len, now);
- else
- buf_len = udp_update_hdr4(c, &udp4_l2_buf[i], dstport,
- udp4_l2_mh_sock[i].msg_len, now);
+ size_t l4len;
- tap_iov[i].iov_len = buf_len;
+ if (v6) {
+ l4len = udp_update_hdr6(c, &udp6_l2_buf[i], dstport,
+ udp6_l2_mh_sock[i].msg_len, now);
+ } else {
+ l4len = udp_update_hdr4(c, &udp4_l2_buf[i], dstport,
+ udp4_l2_mh_sock[i].msg_len, now);
+ }
+ tap_iov[i][UDP_IOV_PAYLOAD].iov_len = l4len;
}
- tap_send_frames(c, tap_iov + start, 1, n);
+ tap_send_frames(c, &tap_iov[start][0], UDP_NUM_IOVS, n);
}
/**
--
@@ -222,12 +222,28 @@ struct udp6_l2_buf_t {
#endif
udp6_l2_buf[UDP_MAX_FRAMES];
+/**
+ * enum udp_iov_idx - Indices for the buffers making up a single UDP frame
+ * @UDP_IOV_TAP tap specific header
+ * @UDP_IOV_ETH Ethernet header
+ * @UDP_IOV_IP IP (v4/v6) header
+ * @UDP_IOV_PAYLOAD IP payload (UDP header + data)
+ * @UDP_NUM_IOVS the number of entries in the iovec array
+ */
+enum udp_iov_idx {
+ UDP_IOV_TAP = 0,
+ UDP_IOV_ETH = 1,
+ UDP_IOV_IP = 2,
+ UDP_IOV_PAYLOAD = 3,
+ UDP_NUM_IOVS
+};
+
/* recvmmsg()/sendmmsg() data for tap */
static struct iovec udp4_l2_iov_sock [UDP_MAX_FRAMES];
static struct iovec udp6_l2_iov_sock [UDP_MAX_FRAMES];
-static struct iovec udp4_l2_iov_tap [UDP_MAX_FRAMES];
-static struct iovec udp6_l2_iov_tap [UDP_MAX_FRAMES];
+static struct iovec udp4_l2_iov_tap [UDP_MAX_FRAMES][UDP_NUM_IOVS];
+static struct iovec udp6_l2_iov_tap [UDP_MAX_FRAMES][UDP_NUM_IOVS];
static struct mmsghdr udp4_l2_mh_sock [UDP_MAX_FRAMES];
static struct mmsghdr udp6_l2_mh_sock [UDP_MAX_FRAMES];
@@ -309,7 +325,7 @@ static void udp_sock4_iov_init_one(const struct ctx *c, size_t i)
struct msghdr *mh = &udp4_l2_mh_sock[i].msg_hdr;
struct udp4_l2_buf_t *buf = &udp4_l2_buf[i];
struct iovec *siov = &udp4_l2_iov_sock[i];
- struct iovec *tiov = &udp4_l2_iov_tap[i];
+ struct iovec *tiov = udp4_l2_iov_tap[i];
*buf = (struct udp4_l2_buf_t) {
.eh = ETH_HDR_INIT(ETH_P_IP),
@@ -323,7 +339,10 @@ static void udp_sock4_iov_init_one(const struct ctx *c, size_t i)
mh->msg_iov = siov;
mh->msg_iovlen = 1;
- tiov->iov_base = tap_frame_base(c, &buf->taph);
+ tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &buf->taph);
+ tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(buf->eh);
+ tiov[UDP_IOV_IP] = IOV_OF_LVALUE(buf->iph);
+ tiov[UDP_IOV_PAYLOAD].iov_base = &buf->uh;
}
/**
@@ -336,7 +355,7 @@ static void udp_sock6_iov_init_one(const struct ctx *c, size_t i)
struct msghdr *mh = &udp6_l2_mh_sock[i].msg_hdr;
struct udp6_l2_buf_t *buf = &udp6_l2_buf[i];
struct iovec *siov = &udp6_l2_iov_sock[i];
- struct iovec *tiov = &udp6_l2_iov_tap[i];
+ struct iovec *tiov = udp6_l2_iov_tap[i];
*buf = (struct udp6_l2_buf_t) {
.eh = ETH_HDR_INIT(ETH_P_IPV6),
@@ -350,7 +369,10 @@ static void udp_sock6_iov_init_one(const struct ctx *c, size_t i)
mh->msg_iov = siov;
mh->msg_iovlen = 1;
- tiov->iov_base = tap_frame_base(c, &buf->taph);
+ tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &buf->taph);
+ tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(buf->eh);
+ tiov[UDP_IOV_IP] = IOV_OF_LVALUE(buf->ip6h);
+ tiov[UDP_IOV_PAYLOAD].iov_base = &buf->uh;
}
/**
@@ -572,13 +594,14 @@ static void udp_splice_sendfrom(const struct ctx *c, unsigned start, unsigned n,
* @dlen: Length of UDP payload
* @now: Current timestamp
*
- * Return: size of tap frame with headers
+ * Return: size of IPv4 payload (UDP header + data)
*/
static size_t udp_update_hdr4(const struct ctx *c, struct udp4_l2_buf_t *b,
in_port_t dstport, size_t dlen,
const struct timespec *now)
{
- size_t l3len = dlen + sizeof(b->iph) + sizeof(b->uh);
+ size_t l4len = dlen + sizeof(b->uh);
+ size_t l3len = l4len + sizeof(b->iph);
in_port_t srcport = ntohs(b->s_in.sin_port);
struct in_addr src = b->s_in.sin_addr;
@@ -609,9 +632,10 @@ static size_t udp_update_hdr4(const struct ctx *c, struct udp4_l2_buf_t *b,
b->uh.source = b->s_in.sin_port;
b->uh.dest = htons(dstport);
- b->uh.len = htons(dlen + sizeof(b->uh));
+ b->uh.len = htons(l4len);
- return tap_frame_len(c, &b->taph, l3len + sizeof(b->eh));
+ tap_hdr_update(&b->taph, l3len + sizeof(b->eh));
+ return l4len;
}
/**
@@ -622,7 +646,7 @@ static size_t udp_update_hdr4(const struct ctx *c, struct udp4_l2_buf_t *b,
* @dlen: Length of UDP payload
* @now: Current timestamp
*
- * Return: size of tap frame with headers
+ * Return: size of IPv6 payload (UDP header + data)
*/
static size_t udp_update_hdr6(const struct ctx *c, struct udp6_l2_buf_t *b,
in_port_t dstport, size_t dlen,
@@ -679,8 +703,8 @@ static size_t udp_update_hdr6(const struct ctx *c, struct udp6_l2_buf_t *b,
b->uh.len = b->ip6h.payload_len;
csum_udp6(&b->uh, src, dst, b->data, dlen);
- return tap_frame_len(c, &b->taph, l4len +
- sizeof(b->ip6h) + sizeof(b->eh));
+ tap_hdr_update(&b->taph, l4len + sizeof(b->ip6h) + sizeof(b->eh));
+ return l4len;
}
/**
@@ -698,8 +722,8 @@ static void udp_tap_send(const struct ctx *c,
unsigned int start, unsigned int n,
in_port_t dstport, bool v6, const struct timespec *now)
{
- struct iovec *tap_iov;
- unsigned int i;
+ struct iovec (*tap_iov)[UDP_NUM_IOVS];
+ size_t i;
if (v6)
tap_iov = udp6_l2_iov_tap;
@@ -707,19 +731,19 @@ static void udp_tap_send(const struct ctx *c,
tap_iov = udp4_l2_iov_tap;
for (i = start; i < start + n; i++) {
- size_t buf_len;
-
- if (v6)
- buf_len = udp_update_hdr6(c, &udp6_l2_buf[i], dstport,
- udp6_l2_mh_sock[i].msg_len, now);
- else
- buf_len = udp_update_hdr4(c, &udp4_l2_buf[i], dstport,
- udp4_l2_mh_sock[i].msg_len, now);
+ size_t l4len;
- tap_iov[i].iov_len = buf_len;
+ if (v6) {
+ l4len = udp_update_hdr6(c, &udp6_l2_buf[i], dstport,
+ udp6_l2_mh_sock[i].msg_len, now);
+ } else {
+ l4len = udp_update_hdr4(c, &udp4_l2_buf[i], dstport,
+ udp4_l2_mh_sock[i].msg_len, now);
+ }
+ tap_iov[i][UDP_IOV_PAYLOAD].iov_len = l4len;
}
- tap_send_frames(c, tap_iov + start, 1, n);
+ tap_send_frames(c, &tap_iov[start][0], UDP_NUM_IOVS, n);
}
/**
--
2.44.0
next prev parent reply other threads:[~2024-05-01 8:31 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-05-01 8:31 [PATCH v2 0/7] Rework UDP buffers David Gibson
2024-05-01 8:31 ` [PATCH v2 1/7] test: Allow sftp via vsock-ssh in tests David Gibson
2024-05-01 8:31 ` David Gibson [this message]
2024-05-01 8:31 ` [PATCH v2 3/7] udp: Combine initialisation of IPv4 and IPv6 iovs David Gibson
2024-05-01 8:31 ` [PATCH v2 4/7] udp: Explicitly set checksum in guest-bound UDP headers David Gibson
2024-05-01 8:31 ` [PATCH v2 5/7] udp: Share payload buffers between IPv4 and IPv6 David Gibson
2024-05-01 8:31 ` [PATCH v2 6/7] udp: Use the same buffer for the L2 header for all frames David Gibson
2024-05-01 8:31 ` [PATCH v2 7/7] udp: Single buffer for IPv4, IPv6 headers and metadata David Gibson
2024-05-02 14:52 ` [PATCH v2 0/7] Rework UDP buffers Stefano Brivio
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240501083110.2172770-3-david@gibson.dropbear.id.au \
--to=david@gibson.dropbear.id.au \
--cc=passt-dev@passt.top \
--cc=sbrivio@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).