public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
* [PATCH 00/18] Introduce discontiguous frames management
@ 2025-04-02 17:23 Laurent Vivier
  2025-04-02 17:23 ` [PATCH 01/18] arp: Don't mix incoming and outgoing buffers Laurent Vivier
                   ` (17 more replies)
  0 siblings, 18 replies; 34+ messages in thread
From: Laurent Vivier @ 2025-04-02 17:23 UTC (permalink / raw)
  To: passt-dev; +Cc: Laurent Vivier

This series introduces iov_tail to convey frame information
between functions.

This is only an API change, for the moment the memory pool
is only able to store contiguous buffer, so, except for
vhost-user in a special case, we only play with iovec array
with only one entry.

Laurent Vivier (18):
  arp: Don't mix incoming and outgoing buffers
  iov: Update IOV_REMOVE_HEADER() and IOV_PEEK_HEADER()
  tap: Use iov_tail with tap_add_packet()
  packet: Use iov_tail with packet_add()
  packet: Add packet_base()
  arp: Convert to iov_tail
  ndp: Convert to iov_tail
  icmp: Convert to iov_tail
  udp: Convert to iov_tail
  tcp: Convert tcp_tap_handler() to use iov_tail
  tcp: Convert tcp_data_from_tap() to use iov_tail
  dhcpv6: Convert to iov_tail
  dhcpv6: move offset initialization out of dhcpv6_opt()
  dhcpv6: Use iov_tail in dhcpv6_opt()
  dhcp: Convert to iov_tail
  tap: Convert to iov_tail
  ip: Use iov_tail in ipv6_l4hdr()
  tap: Convert to iov_tail

 arp.c       |  90 ++++++++++++++++++++++++------------
 dhcp.c      |  38 ++++++++-------
 dhcpv6.c    |  70 +++++++++++++++-------------
 icmp.c      |  38 +++++++++------
 iov.c       | 130 ++++++++++++++++++++++++++++++++++++++++++++++++----
 iov.h       |  54 ++++++++++++++++------
 ip.c        |  27 +++++------
 ip.h        |   3 +-
 ndp.c       |   7 ++-
 packet.c    |  58 ++++++++++-------------
 packet.h    |  19 ++++----
 pcap.c      |   1 +
 tap.c       |  86 ++++++++++++++++++++++------------
 tap.h       |   2 +-
 tcp.c       |  41 +++++++++++------
 tcp_buf.c   |   2 +-
 udp.c       |  37 ++++++++++-----
 vu_common.c |  25 ++--------
 18 files changed, 474 insertions(+), 254 deletions(-)

-- 
2.49.0



^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 01/18] arp: Don't mix incoming and outgoing buffers
  2025-04-02 17:23 [PATCH 00/18] Introduce discontiguous frames management Laurent Vivier
@ 2025-04-02 17:23 ` Laurent Vivier
  2025-04-03  1:57   ` David Gibson
  2025-04-02 17:23 ` [PATCH 02/18] iov: Update IOV_REMOVE_HEADER() and IOV_PEEK_HEADER() Laurent Vivier
                   ` (16 subsequent siblings)
  17 siblings, 1 reply; 34+ messages in thread
From: Laurent Vivier @ 2025-04-02 17:23 UTC (permalink / raw)
  To: passt-dev; +Cc: Laurent Vivier

Don't use the memory of the incoming packet to build the outgoing buffer
as it can be memory of the TX queue in the case of vhost-user.

Moreover with vhost-user, the packet can be splitted accross several
iovec and it's easier to rebuild it in a buffer than updating an
existing iovec array.

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
 arp.c | 84 ++++++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 55 insertions(+), 29 deletions(-)

diff --git a/arp.c b/arp.c
index fc482bbd9938..9d68d7c3b602 100644
--- a/arp.c
+++ b/arp.c
@@ -31,56 +31,82 @@
 #include "tap.h"
 
 /**
- * arp() - Check if this is a supported ARP message, reply as needed
+ * ignore_arp() - Check if this is a supported ARP message
  * @c:		Execution context
- * @p:		Packet pool, single packet with Ethernet buffer
+ * @ah:		ARP header
+ * @am:		ARP message
  *
- * Return: 1 if handled, -1 on failure
+ * Return: true if the message is supported, false otherwise.
  */
-int arp(const struct ctx *c, const struct pool *p)
+static bool ignore_arp(const struct ctx *c,
+		       const struct arphdr *ah, const struct arpmsg *am)
 {
-	unsigned char swap[4];
-	struct ethhdr *eh;
-	struct arphdr *ah;
-	struct arpmsg *am;
-	size_t l2len;
-
-	eh = packet_get(p, 0, 0,			 sizeof(*eh), NULL);
-	ah = packet_get(p, 0, sizeof(*eh),		 sizeof(*ah), NULL);
-	am = packet_get(p, 0, sizeof(*eh) + sizeof(*ah), sizeof(*am), NULL);
-
-	if (!eh || !ah || !am)
-		return -1;
-
 	if (ah->ar_hrd != htons(ARPHRD_ETHER)	||
 	    ah->ar_pro != htons(ETH_P_IP)	||
 	    ah->ar_hln != ETH_ALEN		||
 	    ah->ar_pln != 4			||
 	    ah->ar_op  != htons(ARPOP_REQUEST))
-		return 1;
+		return true;
 
 	/* Discard announcements, but not 0.0.0.0 "probes" */
 	if (memcmp(am->sip, &in4addr_any, sizeof(am->sip)) &&
 	    !memcmp(am->sip, am->tip, sizeof(am->sip)))
-		return 1;
+		return true;
 
 	/* Don't resolve the guest's assigned address, either. */
 	if (!memcmp(am->tip, &c->ip4.addr, sizeof(am->tip)))
+		return true;
+
+	return false;
+}
+
+/**
+ * arp() - Check if this is a supported ARP message, reply as needed
+ * @c:		Execution context
+ * @p:		Packet pool, single packet with Ethernet buffer
+ *
+ * Return: 1 if handled, -1 on failure
+ */
+int arp(const struct ctx *c, const struct pool *p)
+{
+	struct {
+		struct ethhdr eh;
+		struct arphdr ah;
+		struct arpmsg am;
+	} __attribute__((__packed__)) resp;
+	const struct ethhdr *eh;
+	const struct arphdr *ah;
+	const struct arpmsg *am;
+
+	eh = packet_get(p, 0, 0,			 sizeof(*eh), NULL);
+	ah = packet_get(p, 0, sizeof(*eh),		 sizeof(*ah), NULL);
+	am = packet_get(p, 0, sizeof(*eh) + sizeof(*ah), sizeof(*am), NULL);
+
+	if (!eh || !ah || !am)
+		return -1;
+
+	if (ignore_arp(c, ah, am))
 		return 1;
 
-	ah->ar_op = htons(ARPOP_REPLY);
-	memcpy(am->tha,		am->sha,	sizeof(am->tha));
-	memcpy(am->sha,		c->our_tap_mac,	sizeof(am->sha));
+	/* ethernet header */
+	resp.eh.h_proto = htons(ETH_P_ARP);
+	memcpy(resp.eh.h_dest, eh->h_source, sizeof(resp.eh.h_dest));
+	memcpy(resp.eh.h_source, c->our_tap_mac, sizeof(resp.eh.h_source));
 
-	memcpy(swap,		am->tip,	sizeof(am->tip));
-	memcpy(am->tip,		am->sip,	sizeof(am->tip));
-	memcpy(am->sip,		swap,		sizeof(am->sip));
+	/* ARP header */
+	resp.ah.ar_op = htons(ARPOP_REPLY);
+	resp.ah.ar_hrd = ah->ar_hrd;
+	resp.ah.ar_pro = ah->ar_pro;
+	resp.ah.ar_hln = ah->ar_hln;
+	resp.ah.ar_pln = ah->ar_pln;
 
-	l2len = sizeof(*eh) + sizeof(*ah) + sizeof(*am);
-	memcpy(eh->h_dest,	eh->h_source,	sizeof(eh->h_dest));
-	memcpy(eh->h_source,	c->our_tap_mac,	sizeof(eh->h_source));
+	/* ARP message */
+	memcpy(resp.am.sha,		c->our_tap_mac,	sizeof(resp.am.sha));
+	memcpy(resp.am.sip,		am->tip,	sizeof(resp.am.sip));
+	memcpy(resp.am.tha,		am->sha,	sizeof(resp.am.tha));
+	memcpy(resp.am.tip,		am->sip,	sizeof(resp.am.tip));
 
-	tap_send_single(c, eh, l2len);
+	tap_send_single(c, &resp, sizeof(resp));
 
 	return 1;
 }
-- 
@@ -31,56 +31,82 @@
 #include "tap.h"
 
 /**
- * arp() - Check if this is a supported ARP message, reply as needed
+ * ignore_arp() - Check if this is a supported ARP message
  * @c:		Execution context
- * @p:		Packet pool, single packet with Ethernet buffer
+ * @ah:		ARP header
+ * @am:		ARP message
  *
- * Return: 1 if handled, -1 on failure
+ * Return: true if the message is supported, false otherwise.
  */
-int arp(const struct ctx *c, const struct pool *p)
+static bool ignore_arp(const struct ctx *c,
+		       const struct arphdr *ah, const struct arpmsg *am)
 {
-	unsigned char swap[4];
-	struct ethhdr *eh;
-	struct arphdr *ah;
-	struct arpmsg *am;
-	size_t l2len;
-
-	eh = packet_get(p, 0, 0,			 sizeof(*eh), NULL);
-	ah = packet_get(p, 0, sizeof(*eh),		 sizeof(*ah), NULL);
-	am = packet_get(p, 0, sizeof(*eh) + sizeof(*ah), sizeof(*am), NULL);
-
-	if (!eh || !ah || !am)
-		return -1;
-
 	if (ah->ar_hrd != htons(ARPHRD_ETHER)	||
 	    ah->ar_pro != htons(ETH_P_IP)	||
 	    ah->ar_hln != ETH_ALEN		||
 	    ah->ar_pln != 4			||
 	    ah->ar_op  != htons(ARPOP_REQUEST))
-		return 1;
+		return true;
 
 	/* Discard announcements, but not 0.0.0.0 "probes" */
 	if (memcmp(am->sip, &in4addr_any, sizeof(am->sip)) &&
 	    !memcmp(am->sip, am->tip, sizeof(am->sip)))
-		return 1;
+		return true;
 
 	/* Don't resolve the guest's assigned address, either. */
 	if (!memcmp(am->tip, &c->ip4.addr, sizeof(am->tip)))
+		return true;
+
+	return false;
+}
+
+/**
+ * arp() - Check if this is a supported ARP message, reply as needed
+ * @c:		Execution context
+ * @p:		Packet pool, single packet with Ethernet buffer
+ *
+ * Return: 1 if handled, -1 on failure
+ */
+int arp(const struct ctx *c, const struct pool *p)
+{
+	struct {
+		struct ethhdr eh;
+		struct arphdr ah;
+		struct arpmsg am;
+	} __attribute__((__packed__)) resp;
+	const struct ethhdr *eh;
+	const struct arphdr *ah;
+	const struct arpmsg *am;
+
+	eh = packet_get(p, 0, 0,			 sizeof(*eh), NULL);
+	ah = packet_get(p, 0, sizeof(*eh),		 sizeof(*ah), NULL);
+	am = packet_get(p, 0, sizeof(*eh) + sizeof(*ah), sizeof(*am), NULL);
+
+	if (!eh || !ah || !am)
+		return -1;
+
+	if (ignore_arp(c, ah, am))
 		return 1;
 
-	ah->ar_op = htons(ARPOP_REPLY);
-	memcpy(am->tha,		am->sha,	sizeof(am->tha));
-	memcpy(am->sha,		c->our_tap_mac,	sizeof(am->sha));
+	/* ethernet header */
+	resp.eh.h_proto = htons(ETH_P_ARP);
+	memcpy(resp.eh.h_dest, eh->h_source, sizeof(resp.eh.h_dest));
+	memcpy(resp.eh.h_source, c->our_tap_mac, sizeof(resp.eh.h_source));
 
-	memcpy(swap,		am->tip,	sizeof(am->tip));
-	memcpy(am->tip,		am->sip,	sizeof(am->tip));
-	memcpy(am->sip,		swap,		sizeof(am->sip));
+	/* ARP header */
+	resp.ah.ar_op = htons(ARPOP_REPLY);
+	resp.ah.ar_hrd = ah->ar_hrd;
+	resp.ah.ar_pro = ah->ar_pro;
+	resp.ah.ar_hln = ah->ar_hln;
+	resp.ah.ar_pln = ah->ar_pln;
 
-	l2len = sizeof(*eh) + sizeof(*ah) + sizeof(*am);
-	memcpy(eh->h_dest,	eh->h_source,	sizeof(eh->h_dest));
-	memcpy(eh->h_source,	c->our_tap_mac,	sizeof(eh->h_source));
+	/* ARP message */
+	memcpy(resp.am.sha,		c->our_tap_mac,	sizeof(resp.am.sha));
+	memcpy(resp.am.sip,		am->tip,	sizeof(resp.am.sip));
+	memcpy(resp.am.tha,		am->sha,	sizeof(resp.am.tha));
+	memcpy(resp.am.tip,		am->sip,	sizeof(resp.am.tip));
 
-	tap_send_single(c, eh, l2len);
+	tap_send_single(c, &resp, sizeof(resp));
 
 	return 1;
 }
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 02/18] iov: Update IOV_REMOVE_HEADER() and IOV_PEEK_HEADER()
  2025-04-02 17:23 [PATCH 00/18] Introduce discontiguous frames management Laurent Vivier
  2025-04-02 17:23 ` [PATCH 01/18] arp: Don't mix incoming and outgoing buffers Laurent Vivier
@ 2025-04-02 17:23 ` Laurent Vivier
  2025-04-03  2:36   ` David Gibson
  2025-04-02 17:23 ` [PATCH 03/18] tap: Use iov_tail with tap_add_packet() Laurent Vivier
                   ` (15 subsequent siblings)
  17 siblings, 1 reply; 34+ messages in thread
From: Laurent Vivier @ 2025-04-02 17:23 UTC (permalink / raw)
  To: passt-dev; +Cc: Laurent Vivier

Provide a temporary variable of the wanted type to store
the header if the memory in the iovec array is not contiguous.

Also introduce iov_tail_ptr(), iov_drop() and iov_copy()
for later use.

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
 iov.c     | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++----
 iov.h     |  53 ++++++++++++++++------
 tcp_buf.c |   2 +-
 3 files changed, 163 insertions(+), 25 deletions(-)

diff --git a/iov.c b/iov.c
index 8c63b7ea6e31..d96fc2ab594b 100644
--- a/iov.c
+++ b/iov.c
@@ -108,7 +108,7 @@ size_t iov_from_buf(const struct iovec *iov, size_t iov_cnt,
  *
  * Returns:    The number of bytes successfully copied.
  */
-/* cppcheck-suppress unusedFunction */
+/* cppcheck-suppress [staticFunction,unmatchedSuppression] */
 size_t iov_to_buf(const struct iovec *iov, size_t iov_cnt,
 		  size_t offset, void *buf, size_t bytes)
 {
@@ -126,6 +126,7 @@ size_t iov_to_buf(const struct iovec *iov, size_t iov_cnt,
 	/* copying data */
 	for (copied = 0; copied < bytes && i < iov_cnt; i++) {
 		size_t len = MIN(iov[i].iov_len - offset, bytes - copied);
+		/* NOLINTNEXTLINE(clang-analyzer-core.NonNullParamChecker) */
 		memcpy((char *)buf + copied, (char *)iov[i].iov_base + offset,
 		       len);
 		copied += len;
@@ -156,6 +157,45 @@ size_t iov_size(const struct iovec *iov, size_t iov_cnt)
 	return len;
 }
 
+/**
+ * iov_copy - Copy data from one scatter/gather I/O vector (struct iovec) to
+ *	      another.
+ *
+ * @dst_iov:     Pointer to the destination array of struct iovec describing
+ *		 the scatter/gather I/O vector to copy to.
+ * @dst_iov_cnt: Number of elements in the destination iov array.
+ * @iov:	 Pointer to the source array of struct iovec describing
+ *		 the scatter/gather I/O vector to copy from.
+ * @iov_cnt:     Number of elements in the source iov array.
+ * @offset:      Offset within the source iov from where copying should start.
+ * @bytes:	 Total number of bytes to copy from iov to dst_iov.
+ *
+ * Returns:      The number of elements successfully copied to the destination
+ *		 iov array.
+ */
+/* cppcheck-suppress unusedFunction */
+unsigned iov_copy(struct iovec *dst_iov, size_t dst_iov_cnt,
+		  const struct iovec *iov, size_t iov_cnt,
+		  size_t offset, size_t bytes)
+{
+	unsigned int i, j;
+
+	i = iov_skip_bytes(iov, iov_cnt, offset, &offset);
+
+	/* copying data */
+	for (j = 0; i < iov_cnt && j < dst_iov_cnt && bytes; i++) {
+		size_t len = MIN(bytes, iov[i].iov_len - offset);
+
+		dst_iov[j].iov_base = (char *)iov[i].iov_base + offset;
+		dst_iov[j].iov_len = len;
+		j++;
+		bytes -= len;
+		offset = 0;
+	}
+
+	return j;
+}
+
 /**
  * iov_tail_prune() - Remove any unneeded buffers from an IOV tail
  * @tail:	IO vector tail (modified)
@@ -192,7 +232,50 @@ size_t iov_tail_size(struct iov_tail *tail)
 }
 
 /**
- * iov_peek_header_() - Get pointer to a header from an IOV tail
+ * iov_drop() - update head of the tail
+ * @tail:	IO vector tail
+ * @len:	length to move the head of the tail
+ *
+ * Returns:	true if the tail still contains any bytes, otherwise false
+ */
+/* cppcheck-suppress unusedFunction */
+bool iov_drop(struct iov_tail *tail, size_t len)
+{
+	tail->off = tail->off + len;
+
+	return iov_tail_prune(tail);
+}
+
+/**
+ * iov_tail_ptr() - get a pointer to the head of the tail
+ * @tail:	IO vector tail
+ * @off:	Byte offset in the tail to skip
+ * @len:	Length of the data to get, in bytes
+ *
+ * Returns:	pointer to the data in the tail, NULL if the
+ *		tail doesn't contains @len bytes.
+ */
+/* cppcheck-suppress unusedFunction */
+void *iov_tail_ptr(struct iov_tail *tail, size_t off, size_t len)
+{
+	const struct iovec *iov;
+	size_t cnt, i;
+
+	i = iov_skip_bytes(tail->iov, tail->cnt, tail->off + off, &off);
+	if (i == tail->cnt)
+		return NULL;
+
+	iov = &tail->iov[i];
+	cnt = tail->cnt - i;
+
+	if (iov_size(iov, cnt) < off + len)
+		return NULL;
+
+	return (char *)iov[0].iov_base + off;
+}
+
+/**
+ * iov_check_header - Check if a header can be accessed
  * @tail:	IOV tail to get header from
  * @len:	Length of header to get, in bytes
  * @align:	Required alignment of header, in bytes
@@ -203,8 +286,7 @@ size_t iov_tail_size(struct iov_tail *tail)
  *	    overruns the IO vector, is not contiguous or doesn't have the
  *	    requested alignment.
  */
-/* cppcheck-suppress [staticFunction,unmatchedSuppression] */
-void *iov_peek_header_(struct iov_tail *tail, size_t len, size_t align)
+static void *iov_check_header(struct iov_tail *tail, size_t len, size_t align)
 {
 	char *p;
 
@@ -225,7 +307,36 @@ void *iov_peek_header_(struct iov_tail *tail, size_t len, size_t align)
 }
 
 /**
- * iov_remove_header_() - Remove a header from an IOV tail
+ * iov_peek_header_ - Get pointer to a header from an IOV tail
+ * @tail:	IOV tail to get header from
+ * @len:	Length of header to get, in bytes
+ * @align:	Required alignment of header, in bytes
+ *
+ * @tail may be pruned, but will represent the same bytes as before.
+ *
+ * Returns: Pointer to the first @len logical bytes of the tail, or to
+ *          a copy if that overruns the IO vector, is not contiguous or
+ *          doesn't have the requested alignment. NULL if that overruns the
+ *          IO vector.
+ */
+/* cppcheck-suppress [staticFunction,unmatchedSuppression] */
+void *iov_peek_header_(struct iov_tail *tail, void *v, size_t len, size_t align)
+{
+	char *p = iov_check_header(tail, len, align);
+	size_t l;
+
+	if (p)
+		return p;
+
+	l = iov_to_buf(tail->iov, tail->cnt,  tail->off, v, len);
+	if (l != len)
+		return NULL;
+
+	return v;
+}
+
+/**
+ * iov_remove_header_ - Remove a header from an IOV tail
  * @tail:	IOV tail to remove header from (modified)
  * @len:	Length of header to remove, in bytes
  * @align:	Required alignment of header, in bytes
@@ -233,17 +344,19 @@ void *iov_peek_header_(struct iov_tail *tail, size_t len, size_t align)
  * On success, @tail is updated so that it longer includes the bytes of the
  * returned header.
  *
- * Returns: Pointer to the first @len logical bytes of the tail, NULL if that
- *	    overruns the IO vector, is not contiguous or doesn't have the
- *	    requested alignment.
+ * Returns: Pointer to the first @len logical bytes of the tail, or to
+ *          a copy if that overruns the IO vector, is not contiguous or
+ *          doesn't have the requested alignment. NULL if that overruns the
+ *          IO vector.
  */
-void *iov_remove_header_(struct iov_tail *tail, size_t len, size_t align)
+void *iov_remove_header_(struct iov_tail *tail, void *v, size_t len, size_t align)
 {
-	char *p = iov_peek_header_(tail, len, align);
+	char *p = iov_peek_header_(tail, v, len, align);
 
 	if (!p)
 		return NULL;
 
 	tail->off = tail->off + len;
+
 	return p;
 }
diff --git a/iov.h b/iov.h
index 9855bf0c0c32..f9b5fdf0803e 100644
--- a/iov.h
+++ b/iov.h
@@ -28,6 +28,9 @@ size_t iov_from_buf(const struct iovec *iov, size_t iov_cnt,
 size_t iov_to_buf(const struct iovec *iov, size_t iov_cnt,
                   size_t offset, void *buf, size_t bytes);
 size_t iov_size(const struct iovec *iov, size_t iov_cnt);
+unsigned iov_copy(struct iovec *dst_iov, size_t dst_iov_cnt,
+                  const struct iovec *iov, size_t iov_cnt,
+                  size_t offset, size_t bytes);
 
 /*
  * DOC: Theory of Operation, struct iov_tail
@@ -70,38 +73,60 @@ struct iov_tail {
 #define IOV_TAIL(iov_, cnt_, off_) \
 	(struct iov_tail){ .iov = (iov_), .cnt = (cnt_), .off = (off_) }
 
+/**
+ * IOV_TAIL_FROM_BUF() - Create a new IOV tail from a buffer
+ * @buf_:	Buffer address to use in the iovec
+ * @len_:	Buffer size
+ * @off_:	Byte offset in the buffer where the tail begins
+ */
+#define IOV_TAIL_FROM_BUF(buf_, len_, off_) \
+	IOV_TAIL((&(const struct iovec){ .iov_base = (buf_), .iov_len = (len_) }), 1, (off_))
+
 bool iov_tail_prune(struct iov_tail *tail);
 size_t iov_tail_size(struct iov_tail *tail);
-void *iov_peek_header_(struct iov_tail *tail, size_t len, size_t align);
-void *iov_remove_header_(struct iov_tail *tail, size_t len, size_t align);
+bool iov_drop(struct iov_tail *tail, size_t len);
+void *iov_peek_header_(struct iov_tail *tail, void *v, size_t len, size_t align);
+void *iov_remove_header_(struct iov_tail *tail, void *v, size_t len, size_t align);
+void *iov_tail_ptr(struct iov_tail *tail, size_t off, size_t len);
 
 /**
  * IOV_PEEK_HEADER() - Get typed pointer to a header from an IOV tail
  * @tail_:	IOV tail to get header from
- * @type_:	Data type of the header
+ * @var_:	Temporary buffer of the type of the header to use if
+ *		the memory in the iovec array is not contiguous.
  *
  * @tail_ may be pruned, but will represent the same bytes as before.
  *
- * Returns: Pointer of type (@type_ *) located at the start of @tail_, NULL if
- *          we can't get a contiguous and aligned pointer.
+ * Returns: Pointer of type (@type_ *) located at the start of @tail_
  */
-#define IOV_PEEK_HEADER(tail_, type_)					\
-	((type_ *)(iov_peek_header_((tail_),				\
-				    sizeof(type_), __alignof__(type_))))
+
+#define IOV_PEEK_HEADER(tail_, var_)					\
+	((__typeof__(var_) *)(iov_peek_header_((tail_), &(var_),	\
+				      sizeof(var_), __alignof__(var_))))
 
 /**
  * IOV_REMOVE_HEADER() - Remove and return typed header from an IOV tail
  * @tail_:	IOV tail to remove header from (modified)
- * @type_:	Data type of the header to remove
+ * @var_:	Temporary buffer of the type of the header to use if
+ *		the memory in the iovec array is not contiguous.
  *
  * On success, @tail_ is updated so that it longer includes the bytes of the
  * returned header.
  *
- * Returns: Pointer of type (@type_ *) located at the old start of @tail_, NULL
- *          if we can't get a contiguous and aligned pointer.
+ * Returns: Pointer of type (@type_ *) located at the old start of @tail_
+ */
+
+#define IOV_REMOVE_HEADER(tail_, var_)					\
+	((__typeof__(var_) *)(iov_remove_header_((tail_), &(var_),	\
+				    sizeof(var_), __alignof__(var_))))
+
+/** IOV_DROP_HEADER() -- Remove a typed header from an IOV tail
+ * @tail_:	IOV tail to remove header from (modified)
+ * @type_:	Data type of the header to remove
+ *
+ * Returns:	true if the tail still contains any bytes, otherwise false
  */
-#define IOV_REMOVE_HEADER(tail_, type_)					\
-	((type_ *)(iov_remove_header_((tail_),				\
-				      sizeof(type_), __alignof__(type_))))
+#define IOV_DROP_HEADER(tail_, type_)					\
+	iov_drop((tail_), sizeof(type_))
 
 #endif /* IOVEC_H */
diff --git a/tcp_buf.c b/tcp_buf.c
index 05305636b503..4bcc1acb245a 100644
--- a/tcp_buf.c
+++ b/tcp_buf.c
@@ -160,7 +160,7 @@ static void tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn,
 				    uint32_t seq, bool no_tcp_csum)
 {
 	struct iov_tail tail = IOV_TAIL(&iov[TCP_IOV_PAYLOAD], 1, 0);
-	struct tcphdr *th = IOV_REMOVE_HEADER(&tail, struct tcphdr);
+	struct tcphdr thc, *th = IOV_REMOVE_HEADER(&tail, thc);
 	struct tap_hdr *taph = iov[TCP_IOV_TAP].iov_base;
 	const struct flowside *tapside = TAPFLOW(conn);
 	const struct in_addr *a4 = inany_v4(&tapside->oaddr);
-- 
@@ -160,7 +160,7 @@ static void tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn,
 				    uint32_t seq, bool no_tcp_csum)
 {
 	struct iov_tail tail = IOV_TAIL(&iov[TCP_IOV_PAYLOAD], 1, 0);
-	struct tcphdr *th = IOV_REMOVE_HEADER(&tail, struct tcphdr);
+	struct tcphdr thc, *th = IOV_REMOVE_HEADER(&tail, thc);
 	struct tap_hdr *taph = iov[TCP_IOV_TAP].iov_base;
 	const struct flowside *tapside = TAPFLOW(conn);
 	const struct in_addr *a4 = inany_v4(&tapside->oaddr);
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 03/18] tap: Use iov_tail with tap_add_packet()
  2025-04-02 17:23 [PATCH 00/18] Introduce discontiguous frames management Laurent Vivier
  2025-04-02 17:23 ` [PATCH 01/18] arp: Don't mix incoming and outgoing buffers Laurent Vivier
  2025-04-02 17:23 ` [PATCH 02/18] iov: Update IOV_REMOVE_HEADER() and IOV_PEEK_HEADER() Laurent Vivier
@ 2025-04-02 17:23 ` Laurent Vivier
  2025-04-03  4:50   ` David Gibson
  2025-04-02 17:23 ` [PATCH 04/18] packet: Use iov_tail with packet_add() Laurent Vivier
                   ` (14 subsequent siblings)
  17 siblings, 1 reply; 34+ messages in thread
From: Laurent Vivier @ 2025-04-02 17:23 UTC (permalink / raw)
  To: passt-dev; +Cc: Laurent Vivier

Use IOV_PEEK_HEADER() to get the ethernet header from the iovec.

Move the workaround about multiple iovec array from vu_handle_tx() to
tap_add_packet(). Removing the offset out of the iovec array should
reduce the iovec count to 1.

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
 iov.c       |  1 -
 pcap.c      |  1 +
 tap.c       | 30 +++++++++++++++++++++---------
 tap.h       |  2 +-
 vu_common.c | 25 +++++--------------------
 5 files changed, 28 insertions(+), 31 deletions(-)

diff --git a/iov.c b/iov.c
index d96fc2ab594b..508fb6da91fb 100644
--- a/iov.c
+++ b/iov.c
@@ -238,7 +238,6 @@ size_t iov_tail_size(struct iov_tail *tail)
  *
  * Returns:	true if the tail still contains any bytes, otherwise false
  */
-/* cppcheck-suppress unusedFunction */
 bool iov_drop(struct iov_tail *tail, size_t len)
 {
 	tail->off = tail->off + len;
diff --git a/pcap.c b/pcap.c
index e95aa6fe29a6..404043a27e22 100644
--- a/pcap.c
+++ b/pcap.c
@@ -76,6 +76,7 @@ static void pcap_frame(const struct iovec *iov, size_t iovcnt,
  * @pkt:	Pointer to data buffer, including L2 headers
  * @l2len:	L2 frame length
  */
+/* cppcheck-suppress unusedFunction */
 void pcap(const char *pkt, size_t l2len)
 {
 	struct iovec iov = { (char *)pkt, l2len };
diff --git a/tap.c b/tap.c
index 182a1151f139..ab3effe80f89 100644
--- a/tap.c
+++ b/tap.c
@@ -1040,29 +1040,36 @@ void tap_handler(struct ctx *c, const struct timespec *now)
 /**
  * tap_add_packet() - Queue/capture packet, update notion of guest MAC address
  * @c:		Execution context
- * @l2len:	Total L2 packet length
- * @p:		Packet buffer
+ * @data:	Packet to add to the pool
  */
-void tap_add_packet(struct ctx *c, ssize_t l2len, char *p)
+void tap_add_packet(struct ctx *c, struct iov_tail *data)
 {
 	const struct ethhdr *eh;
+	struct ethhdr ehc;
 
-	pcap(p, l2len);
+	pcap_iov(data->iov, data->cnt, data->off);
 
-	eh = (struct ethhdr *)p;
+	eh = IOV_PEEK_HEADER(data, ehc);
+	if (!eh)
+		return;
 
 	if (memcmp(c->guest_mac, eh->h_source, ETH_ALEN)) {
 		memcpy(c->guest_mac, eh->h_source, ETH_ALEN);
 		proto_update_l2_buf(c->guest_mac, NULL);
 	}
 
+	iov_tail_prune(data);
+	ASSERT(data->cnt == 1); /* packet_add() doesn't support iovec */
+
 	switch (ntohs(eh->h_proto)) {
 	case ETH_P_ARP:
 	case ETH_P_IP:
-		packet_add(pool_tap4, l2len, p);
+		packet_add(pool_tap4, data->iov[0].iov_len - data->off,
+			   (char *)data->iov[0].iov_base + data->off);
 		break;
 	case ETH_P_IPV6:
-		packet_add(pool_tap6, l2len, p);
+		packet_add(pool_tap6, data->iov[0].iov_len - data->off,
+			   (char *)data->iov[0].iov_base + data->off);
 		break;
 	default:
 		break;
@@ -1128,6 +1135,7 @@ static void tap_passt_input(struct ctx *c, const struct timespec *now)
 
 	while (n >= (ssize_t)sizeof(uint32_t)) {
 		uint32_t l2len = ntohl_unaligned(p);
+		struct iov_tail data;
 
 		if (l2len < sizeof(struct ethhdr) || l2len > L2_MAX_LEN_PASST) {
 			err("Bad frame size from guest, resetting connection");
@@ -1142,7 +1150,8 @@ static void tap_passt_input(struct ctx *c, const struct timespec *now)
 		p += sizeof(uint32_t);
 		n -= sizeof(uint32_t);
 
-		tap_add_packet(c, l2len, p);
+		data = IOV_TAIL_FROM_BUF(p, l2len, 0);
+		tap_add_packet(c, &data);
 
 		p += l2len;
 		n -= l2len;
@@ -1186,6 +1195,8 @@ static void tap_pasta_input(struct ctx *c, const struct timespec *now)
 	for (n = 0;
 	     n <= (ssize_t)(sizeof(pkt_buf) - L2_MAX_LEN_PASTA);
 	     n += len) {
+		struct iov_tail data;
+
 		len = read(c->fd_tap, pkt_buf + n, L2_MAX_LEN_PASTA);
 
 		if (len == 0) {
@@ -1207,7 +1218,8 @@ static void tap_pasta_input(struct ctx *c, const struct timespec *now)
 		    len > (ssize_t)L2_MAX_LEN_PASTA)
 			continue;
 
-		tap_add_packet(c, len, pkt_buf + n);
+		data = IOV_TAIL_FROM_BUF(pkt_buf + n, len, 0);
+		tap_add_packet(c, &data);
 	}
 
 	tap_handler(c, now);
diff --git a/tap.h b/tap.h
index dd39fd896f4a..5034acd8ac46 100644
--- a/tap.h
+++ b/tap.h
@@ -119,6 +119,6 @@ void tap_sock_update_pool(void *base, size_t size);
 void tap_backend_init(struct ctx *c);
 void tap_flush_pools(void);
 void tap_handler(struct ctx *c, const struct timespec *now);
-void tap_add_packet(struct ctx *c, ssize_t l2len, char *p);
+void tap_add_packet(struct ctx *c, struct iov_tail *data);
 
 #endif /* TAP_H */
diff --git a/vu_common.c b/vu_common.c
index 686a09b28c8e..e446fc4f2054 100644
--- a/vu_common.c
+++ b/vu_common.c
@@ -159,7 +159,6 @@ static void vu_handle_tx(struct vu_dev *vdev, int index,
 	struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE];
 	struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
 	struct vu_virtq *vq = &vdev->vq[index];
-	int hdrlen = sizeof(struct virtio_net_hdr_mrg_rxbuf);
 	int out_sg_count;
 	int count;
 
@@ -172,6 +171,7 @@ static void vu_handle_tx(struct vu_dev *vdev, int index,
 	while (count < VIRTQUEUE_MAX_SIZE &&
 	       out_sg_count + VU_MAX_TX_BUFFER_NB <= VIRTQUEUE_MAX_SIZE) {
 		int ret;
+		struct iov_tail data;
 
 		elem[count].out_num = VU_MAX_TX_BUFFER_NB;
 		elem[count].out_sg = &out_sg[out_sg_count];
@@ -187,25 +187,10 @@ static void vu_handle_tx(struct vu_dev *vdev, int index,
 			warn("virtio-net transmit queue contains no out buffers");
 			break;
 		}
-		if (elem[count].out_num == 1) {
-			tap_add_packet(vdev->context,
-				       elem[count].out_sg[0].iov_len - hdrlen,
-				       (char *)elem[count].out_sg[0].iov_base +
-				        hdrlen);
-		} else {
-			/* vnet header can be in a separate iovec */
-			if (elem[count].out_num != 2) {
-				debug("virtio-net transmit queue contains more than one buffer ([%d]: %u)",
-				      count, elem[count].out_num);
-			} else if (elem[count].out_sg[0].iov_len != (size_t)hdrlen) {
-				debug("virtio-net transmit queue entry not aligned on hdrlen ([%d]: %d != %zu)",
-				      count, hdrlen, elem[count].out_sg[0].iov_len);
-			} else {
-				tap_add_packet(vdev->context,
-					       elem[count].out_sg[1].iov_len,
-					       (char *)elem[count].out_sg[1].iov_base);
-			}
-		}
+
+		data = IOV_TAIL(elem[count].out_sg, elem[count].out_num, 0);
+		if (IOV_DROP_HEADER(&data, struct virtio_net_hdr_mrg_rxbuf))
+			tap_add_packet(vdev->context, &data);
 
 		count++;
 	}
-- 
@@ -159,7 +159,6 @@ static void vu_handle_tx(struct vu_dev *vdev, int index,
 	struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE];
 	struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
 	struct vu_virtq *vq = &vdev->vq[index];
-	int hdrlen = sizeof(struct virtio_net_hdr_mrg_rxbuf);
 	int out_sg_count;
 	int count;
 
@@ -172,6 +171,7 @@ static void vu_handle_tx(struct vu_dev *vdev, int index,
 	while (count < VIRTQUEUE_MAX_SIZE &&
 	       out_sg_count + VU_MAX_TX_BUFFER_NB <= VIRTQUEUE_MAX_SIZE) {
 		int ret;
+		struct iov_tail data;
 
 		elem[count].out_num = VU_MAX_TX_BUFFER_NB;
 		elem[count].out_sg = &out_sg[out_sg_count];
@@ -187,25 +187,10 @@ static void vu_handle_tx(struct vu_dev *vdev, int index,
 			warn("virtio-net transmit queue contains no out buffers");
 			break;
 		}
-		if (elem[count].out_num == 1) {
-			tap_add_packet(vdev->context,
-				       elem[count].out_sg[0].iov_len - hdrlen,
-				       (char *)elem[count].out_sg[0].iov_base +
-				        hdrlen);
-		} else {
-			/* vnet header can be in a separate iovec */
-			if (elem[count].out_num != 2) {
-				debug("virtio-net transmit queue contains more than one buffer ([%d]: %u)",
-				      count, elem[count].out_num);
-			} else if (elem[count].out_sg[0].iov_len != (size_t)hdrlen) {
-				debug("virtio-net transmit queue entry not aligned on hdrlen ([%d]: %d != %zu)",
-				      count, hdrlen, elem[count].out_sg[0].iov_len);
-			} else {
-				tap_add_packet(vdev->context,
-					       elem[count].out_sg[1].iov_len,
-					       (char *)elem[count].out_sg[1].iov_base);
-			}
-		}
+
+		data = IOV_TAIL(elem[count].out_sg, elem[count].out_num, 0);
+		if (IOV_DROP_HEADER(&data, struct virtio_net_hdr_mrg_rxbuf))
+			tap_add_packet(vdev->context, &data);
 
 		count++;
 	}
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 04/18] packet: Use iov_tail with packet_add()
  2025-04-02 17:23 [PATCH 00/18] Introduce discontiguous frames management Laurent Vivier
                   ` (2 preceding siblings ...)
  2025-04-02 17:23 ` [PATCH 03/18] tap: Use iov_tail with tap_add_packet() Laurent Vivier
@ 2025-04-02 17:23 ` Laurent Vivier
  2025-04-03  4:54   ` David Gibson
  2025-04-02 17:23 ` [PATCH 05/18] packet: Add packet_base() Laurent Vivier
                   ` (13 subsequent siblings)
  17 siblings, 1 reply; 34+ messages in thread
From: Laurent Vivier @ 2025-04-02 17:23 UTC (permalink / raw)
  To: passt-dev; +Cc: Laurent Vivier

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
 iov.h    |  1 +
 packet.c | 15 ++++++++++++---
 packet.h |  8 +++++---
 tap.c    | 32 ++++++++++++++++++--------------
 4 files changed, 36 insertions(+), 20 deletions(-)

diff --git a/iov.h b/iov.h
index f9b5fdf0803e..99442d031549 100644
--- a/iov.h
+++ b/iov.h
@@ -17,6 +17,7 @@
 
 #include <unistd.h>
 #include <string.h>
+#include <stdbool.h>
 
 #define IOV_OF_LVALUE(lval) \
 	(struct iovec){ .iov_base = &(lval), .iov_len = sizeof(lval) }
diff --git a/packet.c b/packet.c
index bcac03750dc0..8b11e0850ff6 100644
--- a/packet.c
+++ b/packet.c
@@ -64,15 +64,16 @@ static int packet_check_range(const struct pool *p, const char *ptr, size_t len,
 /**
  * packet_add_do() - Add data as packet descriptor to given pool
  * @p:		Existing pool
- * @len:	Length of new descriptor
- * @start:	Start of data
+ * @data:	Data to add
  * @func:	For tracing: name of calling function, NULL means no trace()
  * @line:	For tracing: caller line of function call
  */
-void packet_add_do(struct pool *p, size_t len, const char *start,
+void packet_add_do(struct pool *p, struct iov_tail *data,
 		   const char *func, int line)
 {
 	size_t idx = p->count;
+	const char *start;
+	size_t len;
 
 	if (idx >= p->size) {
 		trace("add packet index %zu to pool with size %zu, %s:%i",
@@ -80,6 +81,14 @@ void packet_add_do(struct pool *p, size_t len, const char *start,
 		return;
 	}
 
+	if (!iov_tail_prune(data))
+		return;
+
+	ASSERT(data->cnt == 1); /* we don't support iovec */
+
+	len = data->iov[0].iov_len - data->off;
+	start = (char *)data->iov[0].iov_base + data->off;
+
 	if (packet_check_range(p, start, len, func, line))
 		return;
 
diff --git a/packet.h b/packet.h
index d099f026631b..f386295da203 100644
--- a/packet.h
+++ b/packet.h
@@ -6,6 +6,8 @@
 #ifndef PACKET_H
 #define PACKET_H
 
+#include "iov.h"
+
 /* Maximum size of a single packet stored in pool, including headers */
 #define PACKET_MAX_LEN	UINT16_MAX
 
@@ -28,15 +30,15 @@ struct pool {
 };
 
 int vu_packet_check_range(void *buf, const char *ptr, size_t len);
-void packet_add_do(struct pool *p, size_t len, const char *start,
+void packet_add_do(struct pool *p, struct iov_tail *data,
 		   const char *func, int line);
 void *packet_get_do(const struct pool *p, const size_t idx,
 		    size_t offset, size_t len, size_t *left,
 		    const char *func, int line);
 void pool_flush(struct pool *p);
 
-#define packet_add(p, len, start)					\
-	packet_add_do(p, len, start, __func__, __LINE__)
+#define packet_add(p, data)					\
+	packet_add_do(p, data, __func__, __LINE__)
 
 #define packet_get(p, idx, offset, len, left)				\
 	packet_get_do(p, idx, offset, len, left, __func__, __LINE__)
diff --git a/tap.c b/tap.c
index ab3effe80f89..4b54807c4101 100644
--- a/tap.c
+++ b/tap.c
@@ -683,6 +683,7 @@ resume:
 		size_t l2len, l3len, hlen, l4len;
 		const struct ethhdr *eh;
 		const struct udphdr *uh;
+		struct iov_tail data;
 		struct iphdr *iph;
 		const char *l4h;
 
@@ -694,7 +695,8 @@ resume:
 		if (ntohs(eh->h_proto) == ETH_P_ARP) {
 			PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
 
-			packet_add(pkt, l2len, (char *)eh);
+			data = IOV_TAIL_FROM_BUF((void *)eh, l2len, 0);
+			packet_add(pkt, &data);
 			arp(c, pkt);
 			continue;
 		}
@@ -739,7 +741,8 @@ resume:
 
 			tap_packet_debug(iph, NULL, NULL, 0, NULL, 1);
 
-			packet_add(pkt, l4len, l4h);
+			data = IOV_TAIL_FROM_BUF((void *)l4h, l4len, 0);
+			packet_add(pkt, &data);
 			icmp_tap_handler(c, PIF_TAP, AF_INET,
 					 &iph->saddr, &iph->daddr,
 					 pkt, now);
@@ -753,7 +756,8 @@ resume:
 		if (iph->protocol == IPPROTO_UDP) {
 			PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
 
-			packet_add(pkt, l2len, (char *)eh);
+			data = IOV_TAIL_FROM_BUF((void *)eh, l2len, 0);
+			packet_add(pkt, &data);
 			if (dhcp(c, pkt))
 				continue;
 		}
@@ -802,7 +806,8 @@ resume:
 #undef L4_SET
 
 append:
-		packet_add((struct pool *)&seq->p, l4len, l4h);
+		data = IOV_TAIL_FROM_BUF((void *)l4h, l4len, 0);
+		packet_add((struct pool *)&seq->p, &data);
 	}
 
 	for (j = 0, seq = tap4_l4; j < seq_count; j++, seq++) {
@@ -858,6 +863,7 @@ resume:
 		struct in6_addr *saddr, *daddr;
 		const struct ethhdr *eh;
 		const struct udphdr *uh;
+		struct iov_tail data;
 		struct ipv6hdr *ip6h;
 		uint8_t proto;
 		char *l4h;
@@ -911,7 +917,8 @@ resume:
 			if (l4len < sizeof(struct icmp6hdr))
 				continue;
 
-			packet_add(pkt, l4len, l4h);
+			data = IOV_TAIL_FROM_BUF(l4h, l4len, 0);
+			packet_add(pkt, &data);
 
 			if (ndp(c, (struct icmp6hdr *)l4h, saddr, pkt))
 				continue;
@@ -930,7 +937,8 @@ resume:
 		if (proto == IPPROTO_UDP) {
 			PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
 
-			packet_add(pkt, l4len, l4h);
+			data = IOV_TAIL_FROM_BUF(l4h, l4len, 0);
+			packet_add(pkt, &data);
 
 			if (dhcpv6(c, pkt, saddr, daddr))
 				continue;
@@ -984,7 +992,8 @@ resume:
 #undef L4_SET
 
 append:
-		packet_add((struct pool *)&seq->p, l4len, l4h);
+		data = IOV_TAIL_FROM_BUF(l4h, l4len, 0);
+		packet_add((struct pool *)&seq->p, &data);
 	}
 
 	for (j = 0, seq = tap6_l4; j < seq_count; j++, seq++) {
@@ -1058,18 +1067,13 @@ void tap_add_packet(struct ctx *c, struct iov_tail *data)
 		proto_update_l2_buf(c->guest_mac, NULL);
 	}
 
-	iov_tail_prune(data);
-	ASSERT(data->cnt == 1); /* packet_add() doesn't support iovec */
-
 	switch (ntohs(eh->h_proto)) {
 	case ETH_P_ARP:
 	case ETH_P_IP:
-		packet_add(pool_tap4, data->iov[0].iov_len - data->off,
-			   (char *)data->iov[0].iov_base + data->off);
+		packet_add(pool_tap4, data);
 		break;
 	case ETH_P_IPV6:
-		packet_add(pool_tap6, data->iov[0].iov_len - data->off,
-			   (char *)data->iov[0].iov_base + data->off);
+		packet_add(pool_tap6, data);
 		break;
 	default:
 		break;
-- 
@@ -683,6 +683,7 @@ resume:
 		size_t l2len, l3len, hlen, l4len;
 		const struct ethhdr *eh;
 		const struct udphdr *uh;
+		struct iov_tail data;
 		struct iphdr *iph;
 		const char *l4h;
 
@@ -694,7 +695,8 @@ resume:
 		if (ntohs(eh->h_proto) == ETH_P_ARP) {
 			PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
 
-			packet_add(pkt, l2len, (char *)eh);
+			data = IOV_TAIL_FROM_BUF((void *)eh, l2len, 0);
+			packet_add(pkt, &data);
 			arp(c, pkt);
 			continue;
 		}
@@ -739,7 +741,8 @@ resume:
 
 			tap_packet_debug(iph, NULL, NULL, 0, NULL, 1);
 
-			packet_add(pkt, l4len, l4h);
+			data = IOV_TAIL_FROM_BUF((void *)l4h, l4len, 0);
+			packet_add(pkt, &data);
 			icmp_tap_handler(c, PIF_TAP, AF_INET,
 					 &iph->saddr, &iph->daddr,
 					 pkt, now);
@@ -753,7 +756,8 @@ resume:
 		if (iph->protocol == IPPROTO_UDP) {
 			PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
 
-			packet_add(pkt, l2len, (char *)eh);
+			data = IOV_TAIL_FROM_BUF((void *)eh, l2len, 0);
+			packet_add(pkt, &data);
 			if (dhcp(c, pkt))
 				continue;
 		}
@@ -802,7 +806,8 @@ resume:
 #undef L4_SET
 
 append:
-		packet_add((struct pool *)&seq->p, l4len, l4h);
+		data = IOV_TAIL_FROM_BUF((void *)l4h, l4len, 0);
+		packet_add((struct pool *)&seq->p, &data);
 	}
 
 	for (j = 0, seq = tap4_l4; j < seq_count; j++, seq++) {
@@ -858,6 +863,7 @@ resume:
 		struct in6_addr *saddr, *daddr;
 		const struct ethhdr *eh;
 		const struct udphdr *uh;
+		struct iov_tail data;
 		struct ipv6hdr *ip6h;
 		uint8_t proto;
 		char *l4h;
@@ -911,7 +917,8 @@ resume:
 			if (l4len < sizeof(struct icmp6hdr))
 				continue;
 
-			packet_add(pkt, l4len, l4h);
+			data = IOV_TAIL_FROM_BUF(l4h, l4len, 0);
+			packet_add(pkt, &data);
 
 			if (ndp(c, (struct icmp6hdr *)l4h, saddr, pkt))
 				continue;
@@ -930,7 +937,8 @@ resume:
 		if (proto == IPPROTO_UDP) {
 			PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
 
-			packet_add(pkt, l4len, l4h);
+			data = IOV_TAIL_FROM_BUF(l4h, l4len, 0);
+			packet_add(pkt, &data);
 
 			if (dhcpv6(c, pkt, saddr, daddr))
 				continue;
@@ -984,7 +992,8 @@ resume:
 #undef L4_SET
 
 append:
-		packet_add((struct pool *)&seq->p, l4len, l4h);
+		data = IOV_TAIL_FROM_BUF(l4h, l4len, 0);
+		packet_add((struct pool *)&seq->p, &data);
 	}
 
 	for (j = 0, seq = tap6_l4; j < seq_count; j++, seq++) {
@@ -1058,18 +1067,13 @@ void tap_add_packet(struct ctx *c, struct iov_tail *data)
 		proto_update_l2_buf(c->guest_mac, NULL);
 	}
 
-	iov_tail_prune(data);
-	ASSERT(data->cnt == 1); /* packet_add() doesn't support iovec */
-
 	switch (ntohs(eh->h_proto)) {
 	case ETH_P_ARP:
 	case ETH_P_IP:
-		packet_add(pool_tap4, data->iov[0].iov_len - data->off,
-			   (char *)data->iov[0].iov_base + data->off);
+		packet_add(pool_tap4, data);
 		break;
 	case ETH_P_IPV6:
-		packet_add(pool_tap6, data->iov[0].iov_len - data->off,
-			   (char *)data->iov[0].iov_base + data->off);
+		packet_add(pool_tap6, data);
 		break;
 	default:
 		break;
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 05/18] packet: Add packet_base()
  2025-04-02 17:23 [PATCH 00/18] Introduce discontiguous frames management Laurent Vivier
                   ` (3 preceding siblings ...)
  2025-04-02 17:23 ` [PATCH 04/18] packet: Use iov_tail with packet_add() Laurent Vivier
@ 2025-04-02 17:23 ` Laurent Vivier
  2025-04-03  4:59   ` David Gibson
  2025-04-02 17:23 ` [PATCH 06/18] arp: Convert to iov_tail Laurent Vivier
                   ` (12 subsequent siblings)
  17 siblings, 1 reply; 34+ messages in thread
From: Laurent Vivier @ 2025-04-02 17:23 UTC (permalink / raw)
  To: passt-dev; +Cc: Laurent Vivier

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
 packet.c | 37 +++++++++++++++++++++++++++++++++++++
 packet.h |  7 +++++++
 2 files changed, 44 insertions(+)

diff --git a/packet.c b/packet.c
index 8b11e0850ff6..25ede38b94cb 100644
--- a/packet.c
+++ b/packet.c
@@ -156,6 +156,43 @@ void *packet_get_do(const struct pool *p, size_t idx, size_t offset,
 	return ptr;
 }
 
+/**
+ * packet_base_do() - Get data range from packet descriptor from given pool
+ * @p:		Packet pool
+ * @idx:	Index of packet descriptor in pool
+ * @func:	For tracing: name of calling function, NULL means no trace()
+ * @line:	For tracing: caller line of function call
+ *
+ * Return: pointer to start of data range, NULL on invalid range or descriptor
+ */
+/* cppcheck-suppress unusedFunction */
+bool packet_base_do(const struct pool *p, size_t idx,
+		   struct iov_tail *data,
+		   const char *func, int line)
+{
+	size_t i;
+
+	if (idx >= p->size || idx >= p->count) {
+		if (func) {
+			trace("packet %zu from pool size: %zu, count: %zu, "
+			      "%s:%i", idx, p->size, p->count, func, line);
+		}
+		return NULL;
+	}
+
+	data->cnt = 1;
+	data->off = 0;
+	data->iov = &p->pkt[idx];
+
+	for (i = 0; i < data->cnt; i++) {
+		if (packet_check_range(p, data->iov[i].iov_base,
+				       data->iov[i].iov_len, func, line))
+			return false;
+	}
+
+	return true;
+}
+
 /**
  * pool_flush() - Flush a packet pool
  * @p:		Pointer to packet pool
diff --git a/packet.h b/packet.h
index f386295da203..35058e747a43 100644
--- a/packet.h
+++ b/packet.h
@@ -35,6 +35,9 @@ void packet_add_do(struct pool *p, struct iov_tail *data,
 void *packet_get_do(const struct pool *p, const size_t idx,
 		    size_t offset, size_t len, size_t *left,
 		    const char *func, int line);
+bool packet_base_do(const struct pool *p, const size_t idx,
+		    struct iov_tail *data,
+		    const char *func, int line);
 void pool_flush(struct pool *p);
 
 #define packet_add(p, data)					\
@@ -43,9 +46,13 @@ void pool_flush(struct pool *p);
 #define packet_get(p, idx, offset, len, left)				\
 	packet_get_do(p, idx, offset, len, left, __func__, __LINE__)
 
+
 #define packet_get_try(p, idx, offset, len, left)			\
 	packet_get_do(p, idx, offset, len, left, NULL, 0)
 
+#define packet_base(p, idx, data)					\
+	packet_base_do(p, idx, data, __func__, __LINE__)
+
 #define PACKET_POOL_DECL(_name, _size, _buf)				\
 struct _name ## _t {							\
 	char *buf;							\
-- 
@@ -35,6 +35,9 @@ void packet_add_do(struct pool *p, struct iov_tail *data,
 void *packet_get_do(const struct pool *p, const size_t idx,
 		    size_t offset, size_t len, size_t *left,
 		    const char *func, int line);
+bool packet_base_do(const struct pool *p, const size_t idx,
+		    struct iov_tail *data,
+		    const char *func, int line);
 void pool_flush(struct pool *p);
 
 #define packet_add(p, data)					\
@@ -43,9 +46,13 @@ void pool_flush(struct pool *p);
 #define packet_get(p, idx, offset, len, left)				\
 	packet_get_do(p, idx, offset, len, left, __func__, __LINE__)
 
+
 #define packet_get_try(p, idx, offset, len, left)			\
 	packet_get_do(p, idx, offset, len, left, NULL, 0)
 
+#define packet_base(p, idx, data)					\
+	packet_base_do(p, idx, data, __func__, __LINE__)
+
 #define PACKET_POOL_DECL(_name, _size, _buf)				\
 struct _name ## _t {							\
 	char *buf;							\
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 06/18] arp: Convert to iov_tail
  2025-04-02 17:23 [PATCH 00/18] Introduce discontiguous frames management Laurent Vivier
                   ` (4 preceding siblings ...)
  2025-04-02 17:23 ` [PATCH 05/18] packet: Add packet_base() Laurent Vivier
@ 2025-04-02 17:23 ` Laurent Vivier
  2025-04-03  5:00   ` David Gibson
  2025-04-02 17:23 ` [PATCH 07/18] ndp: " Laurent Vivier
                   ` (11 subsequent siblings)
  17 siblings, 1 reply; 34+ messages in thread
From: Laurent Vivier @ 2025-04-02 17:23 UTC (permalink / raw)
  To: passt-dev; +Cc: Laurent Vivier

Use packet_base() and extract headers using IOV_REMOVE_HEADER()
rather than packet_get().

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
 arp.c    | 12 +++++++++---
 packet.c |  1 -
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/arp.c b/arp.c
index 9d68d7c3b602..cd0e15de7de0 100644
--- a/arp.c
+++ b/arp.c
@@ -77,11 +77,17 @@ int arp(const struct ctx *c, const struct pool *p)
 	const struct ethhdr *eh;
 	const struct arphdr *ah;
 	const struct arpmsg *am;
+	struct iov_tail data;
+	struct arphdr  ahc;
+	struct ethhdr ehc;
+	struct arpmsg amc;
 
-	eh = packet_get(p, 0, 0,			 sizeof(*eh), NULL);
-	ah = packet_get(p, 0, sizeof(*eh),		 sizeof(*ah), NULL);
-	am = packet_get(p, 0, sizeof(*eh) + sizeof(*ah), sizeof(*am), NULL);
+	if (!packet_base(p, 0, &data))
+		return -1;
 
+	eh = IOV_REMOVE_HEADER(&data, ehc);
+	ah = IOV_REMOVE_HEADER(&data, ahc);
+	am = IOV_REMOVE_HEADER(&data, amc);
 	if (!eh || !ah || !am)
 		return -1;
 
diff --git a/packet.c b/packet.c
index 25ede38b94cb..8066ac12502b 100644
--- a/packet.c
+++ b/packet.c
@@ -165,7 +165,6 @@ void *packet_get_do(const struct pool *p, size_t idx, size_t offset,
  *
  * Return: pointer to start of data range, NULL on invalid range or descriptor
  */
-/* cppcheck-suppress unusedFunction */
 bool packet_base_do(const struct pool *p, size_t idx,
 		   struct iov_tail *data,
 		   const char *func, int line)
-- 
@@ -165,7 +165,6 @@ void *packet_get_do(const struct pool *p, size_t idx, size_t offset,
  *
  * Return: pointer to start of data range, NULL on invalid range or descriptor
  */
-/* cppcheck-suppress unusedFunction */
 bool packet_base_do(const struct pool *p, size_t idx,
 		   struct iov_tail *data,
 		   const char *func, int line)
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 07/18] ndp: Convert to iov_tail
  2025-04-02 17:23 [PATCH 00/18] Introduce discontiguous frames management Laurent Vivier
                   ` (5 preceding siblings ...)
  2025-04-02 17:23 ` [PATCH 06/18] arp: Convert to iov_tail Laurent Vivier
@ 2025-04-02 17:23 ` Laurent Vivier
  2025-04-03  5:01   ` David Gibson
  2025-04-02 17:23 ` [PATCH 08/18] icmp: " Laurent Vivier
                   ` (10 subsequent siblings)
  17 siblings, 1 reply; 34+ messages in thread
From: Laurent Vivier @ 2025-04-02 17:23 UTC (permalink / raw)
  To: passt-dev; +Cc: Laurent Vivier

Use packet_base() and extract headers using IOV_REMOVE_HEADER()
rather than packet_get().

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
 ndp.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/ndp.c b/ndp.c
index ded2081ddd1d..64e25d5455b4 100644
--- a/ndp.c
+++ b/ndp.c
@@ -351,8 +351,13 @@ int ndp(const struct ctx *c, const struct icmp6hdr *ih,
 
 	if (ih->icmp6_type == NS) {
 		const struct ndp_ns *ns;
+		struct iov_tail data;
+		struct ndp_ns nsc;
 
-		ns = packet_get(p, 0, 0, sizeof(struct ndp_ns), NULL);
+		if (!packet_base(p, 0, &data))
+			return -1;
+
+		ns = IOV_REMOVE_HEADER(&data, nsc);
 		if (!ns)
 			return -1;
 
-- 
@@ -351,8 +351,13 @@ int ndp(const struct ctx *c, const struct icmp6hdr *ih,
 
 	if (ih->icmp6_type == NS) {
 		const struct ndp_ns *ns;
+		struct iov_tail data;
+		struct ndp_ns nsc;
 
-		ns = packet_get(p, 0, 0, sizeof(struct ndp_ns), NULL);
+		if (!packet_base(p, 0, &data))
+			return -1;
+
+		ns = IOV_REMOVE_HEADER(&data, nsc);
 		if (!ns)
 			return -1;
 
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 08/18] icmp: Convert to iov_tail
  2025-04-02 17:23 [PATCH 00/18] Introduce discontiguous frames management Laurent Vivier
                   ` (6 preceding siblings ...)
  2025-04-02 17:23 ` [PATCH 07/18] ndp: " Laurent Vivier
@ 2025-04-02 17:23 ` Laurent Vivier
  2025-04-03  5:04   ` David Gibson
  2025-04-02 17:23 ` [PATCH 09/18] udp: " Laurent Vivier
                   ` (9 subsequent siblings)
  17 siblings, 1 reply; 34+ messages in thread
From: Laurent Vivier @ 2025-04-02 17:23 UTC (permalink / raw)
  To: passt-dev; +Cc: Laurent Vivier

Use packet_base() and extract headers using IOV_PEEK_HEADER()
rather than packet_get().

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
 icmp.c | 38 +++++++++++++++++++++++---------------
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/icmp.c b/icmp.c
index 7e2b3423a8d1..119502c0c340 100644
--- a/icmp.c
+++ b/icmp.c
@@ -241,24 +241,25 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 	struct icmp_ping_flow *pingf;
 	const struct flowside *tgt;
 	union sockaddr_inany sa;
-	size_t dlen, l4len;
+	struct iov_tail data;
+	struct msghdr msh;
 	uint16_t id, seq;
 	union flow *flow;
 	uint8_t proto;
-	socklen_t sl;
-	void *pkt;
 
 	(void)saddr;
 	ASSERT(pif == PIF_TAP);
 
+	if (!packet_base(p, 0, &data))
+		return -1;
+
 	if (af == AF_INET) {
 		const struct icmphdr *ih;
+		struct icmphdr ihc;
 
-		if (!(pkt = packet_get(p, 0, 0, sizeof(*ih), &dlen)))
-			return 1;
-
-		ih =  (struct icmphdr *)pkt;
-		l4len = dlen + sizeof(*ih);
+		ih = IOV_PEEK_HEADER(&data, ihc);
+		if (!ih)
+			return -1;
 
 		if (ih->type != ICMP_ECHO)
 			return 1;
@@ -268,12 +269,11 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 		seq = ntohs(ih->un.echo.sequence);
 	} else if (af == AF_INET6) {
 		const struct icmp6hdr *ih;
+		struct icmp6hdr ihc;
 
-		if (!(pkt = packet_get(p, 0, 0, sizeof(*ih), &dlen)))
-			return 1;
-
-		ih = (struct icmp6hdr *)pkt;
-		l4len = dlen + sizeof(*ih);
+		ih = IOV_PEEK_HEADER(&data, ihc);
+		if (!ih)
+			return -1;
 
 		if (ih->icmp6_type != ICMPV6_ECHO_REQUEST)
 			return 1;
@@ -298,8 +298,16 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 	ASSERT(flow_proto[pingf->f.type] == proto);
 	pingf->ts = now->tv_sec;
 
-	pif_sockaddr(c, &sa, &sl, PIF_HOST, &tgt->eaddr, 0);
-	if (sendto(pingf->sock, pkt, l4len, MSG_NOSIGNAL, &sa.sa, sl) < 0) {
+	ASSERT(data.off == 0);
+	msh.msg_name = &sa;
+	msh.msg_iov = (struct iovec *)data.iov;
+	msh.msg_iovlen = data.cnt;
+	msh.msg_control = NULL;
+	msh.msg_controllen = 0;
+	msh.msg_flags = 0;
+
+	pif_sockaddr(c, &sa, &msh.msg_namelen, PIF_HOST, &tgt->eaddr, 0);
+	if (sendmsg(pingf->sock, &msh, MSG_NOSIGNAL) < 0) {
 		flow_dbg_perror(pingf, "failed to relay request to socket");
 	} else {
 		flow_dbg(pingf,
-- 
@@ -241,24 +241,25 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 	struct icmp_ping_flow *pingf;
 	const struct flowside *tgt;
 	union sockaddr_inany sa;
-	size_t dlen, l4len;
+	struct iov_tail data;
+	struct msghdr msh;
 	uint16_t id, seq;
 	union flow *flow;
 	uint8_t proto;
-	socklen_t sl;
-	void *pkt;
 
 	(void)saddr;
 	ASSERT(pif == PIF_TAP);
 
+	if (!packet_base(p, 0, &data))
+		return -1;
+
 	if (af == AF_INET) {
 		const struct icmphdr *ih;
+		struct icmphdr ihc;
 
-		if (!(pkt = packet_get(p, 0, 0, sizeof(*ih), &dlen)))
-			return 1;
-
-		ih =  (struct icmphdr *)pkt;
-		l4len = dlen + sizeof(*ih);
+		ih = IOV_PEEK_HEADER(&data, ihc);
+		if (!ih)
+			return -1;
 
 		if (ih->type != ICMP_ECHO)
 			return 1;
@@ -268,12 +269,11 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 		seq = ntohs(ih->un.echo.sequence);
 	} else if (af == AF_INET6) {
 		const struct icmp6hdr *ih;
+		struct icmp6hdr ihc;
 
-		if (!(pkt = packet_get(p, 0, 0, sizeof(*ih), &dlen)))
-			return 1;
-
-		ih = (struct icmp6hdr *)pkt;
-		l4len = dlen + sizeof(*ih);
+		ih = IOV_PEEK_HEADER(&data, ihc);
+		if (!ih)
+			return -1;
 
 		if (ih->icmp6_type != ICMPV6_ECHO_REQUEST)
 			return 1;
@@ -298,8 +298,16 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 	ASSERT(flow_proto[pingf->f.type] == proto);
 	pingf->ts = now->tv_sec;
 
-	pif_sockaddr(c, &sa, &sl, PIF_HOST, &tgt->eaddr, 0);
-	if (sendto(pingf->sock, pkt, l4len, MSG_NOSIGNAL, &sa.sa, sl) < 0) {
+	ASSERT(data.off == 0);
+	msh.msg_name = &sa;
+	msh.msg_iov = (struct iovec *)data.iov;
+	msh.msg_iovlen = data.cnt;
+	msh.msg_control = NULL;
+	msh.msg_controllen = 0;
+	msh.msg_flags = 0;
+
+	pif_sockaddr(c, &sa, &msh.msg_namelen, PIF_HOST, &tgt->eaddr, 0);
+	if (sendmsg(pingf->sock, &msh, MSG_NOSIGNAL) < 0) {
 		flow_dbg_perror(pingf, "failed to relay request to socket");
 	} else {
 		flow_dbg(pingf,
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 09/18] udp: Convert to iov_tail
  2025-04-02 17:23 [PATCH 00/18] Introduce discontiguous frames management Laurent Vivier
                   ` (7 preceding siblings ...)
  2025-04-02 17:23 ` [PATCH 08/18] icmp: " Laurent Vivier
@ 2025-04-02 17:23 ` Laurent Vivier
  2025-04-03  5:11   ` David Gibson
  2025-04-02 17:23 ` [PATCH 10/18] tcp: Convert tcp_tap_handler() to use iov_tail Laurent Vivier
                   ` (8 subsequent siblings)
  17 siblings, 1 reply; 34+ messages in thread
From: Laurent Vivier @ 2025-04-02 17:23 UTC (permalink / raw)
  To: passt-dev; +Cc: Laurent Vivier

Use packet_base() and extract headers using IOV_REMOVE_HEADER()
and IOV_PEEK_HEADER() rather than packet_get().

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
 iov.c |  1 -
 udp.c | 37 ++++++++++++++++++++++++++-----------
 2 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/iov.c b/iov.c
index 508fb6da91fb..0c69379316aa 100644
--- a/iov.c
+++ b/iov.c
@@ -173,7 +173,6 @@ size_t iov_size(const struct iovec *iov, size_t iov_cnt)
  * Returns:      The number of elements successfully copied to the destination
  *		 iov array.
  */
-/* cppcheck-suppress unusedFunction */
 unsigned iov_copy(struct iovec *dst_iov, size_t dst_iov_cnt,
 		  const struct iovec *iov, size_t iov_cnt,
 		  size_t offset, size_t bytes)
diff --git a/udp.c b/udp.c
index 80520cbdf188..03906d72e75c 100644
--- a/udp.c
+++ b/udp.c
@@ -858,15 +858,20 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
 	struct iovec m[UIO_MAXIOV];
 	const struct udphdr *uh;
 	struct udp_flow *uflow;
-	int i, s, count = 0;
+	int i, j, s, count = 0;
+	struct iov_tail data;
 	flow_sidx_t tosidx;
 	in_port_t src, dst;
+	struct udphdr uhc;
 	uint8_t topif;
 	socklen_t sl;
 
 	ASSERT(!c->no_udp);
 
-	uh = packet_get(p, idx, 0, sizeof(*uh), NULL);
+	if (!packet_base(p, idx, &data))
+		return 1;
+
+	uh = IOV_PEEK_HEADER(&data, uhc);
 	if (!uh)
 		return 1;
 
@@ -903,23 +908,33 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
 
 	pif_sockaddr(c, &to_sa, &sl, topif, &toside->eaddr, toside->eport);
 
-	for (i = 0; i < (int)p->count - idx; i++) {
-		struct udphdr *uh_send;
-		size_t len;
+	for (i = 0, j = 0; i < (int)p->count - idx && j < UIO_MAXIOV; i++) {
+		const struct udphdr *uh_send;
+
+		if (!packet_base(p, idx + i, &data))
+			return p->count - idx;
 
-		uh_send = packet_get(p, idx + i, 0, sizeof(*uh), &len);
+		uh_send = IOV_REMOVE_HEADER(&data, uhc);
 		if (!uh_send)
 			return p->count - idx;
 
+		iov_tail_prune(&data);
+
+		if (data.cnt + j >= UIO_MAXIOV)
+			return p->count - idx;
+
 		mm[i].msg_hdr.msg_name = &to_sa;
 		mm[i].msg_hdr.msg_namelen = sl;
 
-		if (len) {
-			m[i].iov_base = (char *)(uh_send + 1);
-			m[i].iov_len = len;
+		if (data.cnt ) {
+			unsigned int len;
+
+			len = iov_copy(&m[j], UIO_MAXIOV - j,
+				       &data.iov[0], data.cnt, data.off, SIZE_MAX);
 
-			mm[i].msg_hdr.msg_iov = m + i;
-			mm[i].msg_hdr.msg_iovlen = 1;
+			mm[i].msg_hdr.msg_iov = &m[j];
+			mm[i].msg_hdr.msg_iovlen = len;
+			j += len;
 		} else {
 			mm[i].msg_hdr.msg_iov = NULL;
 			mm[i].msg_hdr.msg_iovlen = 0;
-- 
@@ -858,15 +858,20 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
 	struct iovec m[UIO_MAXIOV];
 	const struct udphdr *uh;
 	struct udp_flow *uflow;
-	int i, s, count = 0;
+	int i, j, s, count = 0;
+	struct iov_tail data;
 	flow_sidx_t tosidx;
 	in_port_t src, dst;
+	struct udphdr uhc;
 	uint8_t topif;
 	socklen_t sl;
 
 	ASSERT(!c->no_udp);
 
-	uh = packet_get(p, idx, 0, sizeof(*uh), NULL);
+	if (!packet_base(p, idx, &data))
+		return 1;
+
+	uh = IOV_PEEK_HEADER(&data, uhc);
 	if (!uh)
 		return 1;
 
@@ -903,23 +908,33 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
 
 	pif_sockaddr(c, &to_sa, &sl, topif, &toside->eaddr, toside->eport);
 
-	for (i = 0; i < (int)p->count - idx; i++) {
-		struct udphdr *uh_send;
-		size_t len;
+	for (i = 0, j = 0; i < (int)p->count - idx && j < UIO_MAXIOV; i++) {
+		const struct udphdr *uh_send;
+
+		if (!packet_base(p, idx + i, &data))
+			return p->count - idx;
 
-		uh_send = packet_get(p, idx + i, 0, sizeof(*uh), &len);
+		uh_send = IOV_REMOVE_HEADER(&data, uhc);
 		if (!uh_send)
 			return p->count - idx;
 
+		iov_tail_prune(&data);
+
+		if (data.cnt + j >= UIO_MAXIOV)
+			return p->count - idx;
+
 		mm[i].msg_hdr.msg_name = &to_sa;
 		mm[i].msg_hdr.msg_namelen = sl;
 
-		if (len) {
-			m[i].iov_base = (char *)(uh_send + 1);
-			m[i].iov_len = len;
+		if (data.cnt ) {
+			unsigned int len;
+
+			len = iov_copy(&m[j], UIO_MAXIOV - j,
+				       &data.iov[0], data.cnt, data.off, SIZE_MAX);
 
-			mm[i].msg_hdr.msg_iov = m + i;
-			mm[i].msg_hdr.msg_iovlen = 1;
+			mm[i].msg_hdr.msg_iov = &m[j];
+			mm[i].msg_hdr.msg_iovlen = len;
+			j += len;
 		} else {
 			mm[i].msg_hdr.msg_iov = NULL;
 			mm[i].msg_hdr.msg_iovlen = 0;
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 10/18] tcp: Convert tcp_tap_handler() to use iov_tail
  2025-04-02 17:23 [PATCH 00/18] Introduce discontiguous frames management Laurent Vivier
                   ` (8 preceding siblings ...)
  2025-04-02 17:23 ` [PATCH 09/18] udp: " Laurent Vivier
@ 2025-04-02 17:23 ` Laurent Vivier
  2025-04-03  5:14   ` David Gibson
  2025-04-02 17:23 ` [PATCH 11/18] tcp: Convert tcp_data_from_tap() " Laurent Vivier
                   ` (7 subsequent siblings)
  17 siblings, 1 reply; 34+ messages in thread
From: Laurent Vivier @ 2025-04-02 17:23 UTC (permalink / raw)
  To: passt-dev; +Cc: Laurent Vivier

Use packet_base() and extract headers using IOV_REMOVE_HEADER()
and iov_peek_header_() rather than packet_get().

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
 tcp.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/tcp.c b/tcp.c
index a4c840e6721c..790714a08793 100644
--- a/tcp.c
+++ b/tcp.c
@@ -310,6 +310,8 @@
 #include "tcp_buf.h"
 #include "tcp_vu.h"
 
+#define OPTLEN_MAX (((1UL << 4) - 6) * 4UL)
+
 #ifndef __USE_MISC
 /* From Linux UAPI, missing in netinet/tcp.h provided by musl */
 struct tcp_repair_opt {
@@ -1957,7 +1959,10 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 {
 	struct tcp_tap_conn *conn;
 	const struct tcphdr *th;
+	char optsc[OPTLEN_MAX];
+	struct iov_tail data;
 	size_t optlen, len;
+	struct tcphdr thc;
 	const char *opts;
 	union flow *flow;
 	flow_sidx_t sidx;
@@ -1966,15 +1971,19 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 
 	(void)pif;
 
-	th = packet_get(p, idx, 0, sizeof(*th), &len);
+	if (!packet_base(p, idx, &data))
+		return 1;
+
+	len = iov_tail_size(&data);
+
+	th = IOV_REMOVE_HEADER(&data, thc);
 	if (!th)
 		return 1;
-	len += sizeof(*th);
 
 	optlen = th->doff * 4UL - sizeof(*th);
 	/* Static checkers might fail to see this: */
-	optlen = MIN(optlen, ((1UL << 4) /* from doff width */ - 6) * 4UL);
-	opts = packet_get(p, idx, sizeof(*th), optlen, NULL);
+	optlen = MIN(optlen, OPTLEN_MAX);
+	opts = (char *)iov_peek_header_(&data, &optsc[0], optlen, 1);
 
 	sidx = flow_lookup_af(c, IPPROTO_TCP, PIF_TAP, af, saddr, daddr,
 			      ntohs(th->source), ntohs(th->dest));
-- 
@@ -310,6 +310,8 @@
 #include "tcp_buf.h"
 #include "tcp_vu.h"
 
+#define OPTLEN_MAX (((1UL << 4) - 6) * 4UL)
+
 #ifndef __USE_MISC
 /* From Linux UAPI, missing in netinet/tcp.h provided by musl */
 struct tcp_repair_opt {
@@ -1957,7 +1959,10 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 {
 	struct tcp_tap_conn *conn;
 	const struct tcphdr *th;
+	char optsc[OPTLEN_MAX];
+	struct iov_tail data;
 	size_t optlen, len;
+	struct tcphdr thc;
 	const char *opts;
 	union flow *flow;
 	flow_sidx_t sidx;
@@ -1966,15 +1971,19 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 
 	(void)pif;
 
-	th = packet_get(p, idx, 0, sizeof(*th), &len);
+	if (!packet_base(p, idx, &data))
+		return 1;
+
+	len = iov_tail_size(&data);
+
+	th = IOV_REMOVE_HEADER(&data, thc);
 	if (!th)
 		return 1;
-	len += sizeof(*th);
 
 	optlen = th->doff * 4UL - sizeof(*th);
 	/* Static checkers might fail to see this: */
-	optlen = MIN(optlen, ((1UL << 4) /* from doff width */ - 6) * 4UL);
-	opts = packet_get(p, idx, sizeof(*th), optlen, NULL);
+	optlen = MIN(optlen, OPTLEN_MAX);
+	opts = (char *)iov_peek_header_(&data, &optsc[0], optlen, 1);
 
 	sidx = flow_lookup_af(c, IPPROTO_TCP, PIF_TAP, af, saddr, daddr,
 			      ntohs(th->source), ntohs(th->dest));
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 11/18] tcp: Convert tcp_data_from_tap() to use iov_tail
  2025-04-02 17:23 [PATCH 00/18] Introduce discontiguous frames management Laurent Vivier
                   ` (9 preceding siblings ...)
  2025-04-02 17:23 ` [PATCH 10/18] tcp: Convert tcp_tap_handler() to use iov_tail Laurent Vivier
@ 2025-04-02 17:23 ` Laurent Vivier
  2025-04-03  5:20   ` David Gibson
  2025-04-02 17:23 ` [PATCH 12/18] dhcpv6: Convert to iov_tail Laurent Vivier
                   ` (6 subsequent siblings)
  17 siblings, 1 reply; 34+ messages in thread
From: Laurent Vivier @ 2025-04-02 17:23 UTC (permalink / raw)
  To: passt-dev; +Cc: Laurent Vivier

Use packet_base() and extract headers using IOV_PEEK_HEADER()
rather than packet_get().

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
 tcp.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/tcp.c b/tcp.c
index 790714a08793..a9c04551d9d6 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1643,14 +1643,19 @@ static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn,
 	for (i = idx, iov_i = 0; i < (int)p->count; i++) {
 		uint32_t seq, seq_offset, ack_seq;
 		const struct tcphdr *th;
-		char *data;
+		struct iov_tail data;
+		unsigned int count;
+		struct tcphdr thc;
 		size_t off;
 
-		th = packet_get(p, i, 0, sizeof(*th), &len);
+		if (!packet_base(p, i, &data))
+			return -1;
+
+		th = IOV_PEEK_HEADER(&data, thc);
 		if (!th)
 			return -1;
-		len += sizeof(*th);
 
+		len = iov_tail_size(&data);
 		off = th->doff * 4UL;
 		if (off < sizeof(*th) || off > len)
 			return -1;
@@ -1661,9 +1666,7 @@ static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn,
 		}
 
 		len -= off;
-		data = packet_get(p, i, off, len, NULL);
-		if (!data)
-			continue;
+		data.off = off;
 
 		seq = ntohl(th->seq);
 		if (SEQ_LT(seq, conn->seq_from_tap) && len <= 1) {
@@ -1737,10 +1740,11 @@ static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn,
 			continue;
 		}
 
-		tcp_iov[iov_i].iov_base = data + seq_offset;
-		tcp_iov[iov_i].iov_len = len - seq_offset;
-		seq_from_tap += tcp_iov[iov_i].iov_len;
-		iov_i++;
+		count = iov_copy(&tcp_iov[iov_i], UIO_MAXIOV - iov_i,
+				 &data.iov[0], data.cnt, data.off + seq_offset,
+				 len - seq_offset);
+		seq_from_tap += iov_size(&tcp_iov[iov_i], count);
+		iov_i += count;
 
 		if (keep == i)
 			keep = -1;
-- 
@@ -1643,14 +1643,19 @@ static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn,
 	for (i = idx, iov_i = 0; i < (int)p->count; i++) {
 		uint32_t seq, seq_offset, ack_seq;
 		const struct tcphdr *th;
-		char *data;
+		struct iov_tail data;
+		unsigned int count;
+		struct tcphdr thc;
 		size_t off;
 
-		th = packet_get(p, i, 0, sizeof(*th), &len);
+		if (!packet_base(p, i, &data))
+			return -1;
+
+		th = IOV_PEEK_HEADER(&data, thc);
 		if (!th)
 			return -1;
-		len += sizeof(*th);
 
+		len = iov_tail_size(&data);
 		off = th->doff * 4UL;
 		if (off < sizeof(*th) || off > len)
 			return -1;
@@ -1661,9 +1666,7 @@ static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn,
 		}
 
 		len -= off;
-		data = packet_get(p, i, off, len, NULL);
-		if (!data)
-			continue;
+		data.off = off;
 
 		seq = ntohl(th->seq);
 		if (SEQ_LT(seq, conn->seq_from_tap) && len <= 1) {
@@ -1737,10 +1740,11 @@ static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn,
 			continue;
 		}
 
-		tcp_iov[iov_i].iov_base = data + seq_offset;
-		tcp_iov[iov_i].iov_len = len - seq_offset;
-		seq_from_tap += tcp_iov[iov_i].iov_len;
-		iov_i++;
+		count = iov_copy(&tcp_iov[iov_i], UIO_MAXIOV - iov_i,
+				 &data.iov[0], data.cnt, data.off + seq_offset,
+				 len - seq_offset);
+		seq_from_tap += iov_size(&tcp_iov[iov_i], count);
+		iov_i += count;
 
 		if (keep == i)
 			keep = -1;
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 12/18] dhcpv6: Convert to iov_tail
  2025-04-02 17:23 [PATCH 00/18] Introduce discontiguous frames management Laurent Vivier
                   ` (10 preceding siblings ...)
  2025-04-02 17:23 ` [PATCH 11/18] tcp: Convert tcp_data_from_tap() " Laurent Vivier
@ 2025-04-02 17:23 ` Laurent Vivier
  2025-04-03  5:21   ` David Gibson
  2025-04-02 17:23 ` [PATCH 13/18] dhcpv6: move offset initialization out of dhcpv6_opt() Laurent Vivier
                   ` (5 subsequent siblings)
  17 siblings, 1 reply; 34+ messages in thread
From: Laurent Vivier @ 2025-04-02 17:23 UTC (permalink / raw)
  To: passt-dev; +Cc: Laurent Vivier

Use packet_base() and extract headers using IOV_REMOVE_HEADER()
and IOV_PEEK_HEADER() rather than packet_get().

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
 dhcpv6.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/dhcpv6.c b/dhcpv6.c
index 373a98869f9b..eb3f188af0b5 100644
--- a/dhcpv6.c
+++ b/dhcpv6.c
@@ -496,9 +496,15 @@ int dhcpv6(struct ctx *c, const struct pool *p,
 	const struct msg_hdr *mh;
 	const struct udphdr *uh;
 	struct opt_hdr *bad_ia;
+	struct iov_tail data;
+	struct msg_hdr mhc;
+	struct udphdr uhc;
 	size_t mlen, n;
 
-	uh = packet_get(p, 0, 0, sizeof(*uh), &mlen);
+	if (!packet_base(p, 0, &data))
+		return -1;
+
+	uh = IOV_REMOVE_HEADER(&data, uhc);
 	if (!uh)
 		return -1;
 
@@ -511,6 +517,7 @@ int dhcpv6(struct ctx *c, const struct pool *p,
 	if (!IN6_IS_ADDR_MULTICAST(daddr))
 		return -1;
 
+	mlen = iov_tail_size(&data);
 	if (mlen + sizeof(*uh) != ntohs(uh->len) || mlen < sizeof(*mh))
 		return -1;
 
@@ -518,7 +525,7 @@ int dhcpv6(struct ctx *c, const struct pool *p,
 
 	src = &c->ip6.our_tap_ll;
 
-	mh = packet_get(p, 0, sizeof(*uh), sizeof(*mh), NULL);
+	mh = IOV_PEEK_HEADER(&data, mhc);
 	if (!mh)
 		return -1;
 
-- 
@@ -496,9 +496,15 @@ int dhcpv6(struct ctx *c, const struct pool *p,
 	const struct msg_hdr *mh;
 	const struct udphdr *uh;
 	struct opt_hdr *bad_ia;
+	struct iov_tail data;
+	struct msg_hdr mhc;
+	struct udphdr uhc;
 	size_t mlen, n;
 
-	uh = packet_get(p, 0, 0, sizeof(*uh), &mlen);
+	if (!packet_base(p, 0, &data))
+		return -1;
+
+	uh = IOV_REMOVE_HEADER(&data, uhc);
 	if (!uh)
 		return -1;
 
@@ -511,6 +517,7 @@ int dhcpv6(struct ctx *c, const struct pool *p,
 	if (!IN6_IS_ADDR_MULTICAST(daddr))
 		return -1;
 
+	mlen = iov_tail_size(&data);
 	if (mlen + sizeof(*uh) != ntohs(uh->len) || mlen < sizeof(*mh))
 		return -1;
 
@@ -518,7 +525,7 @@ int dhcpv6(struct ctx *c, const struct pool *p,
 
 	src = &c->ip6.our_tap_ll;
 
-	mh = packet_get(p, 0, sizeof(*uh), sizeof(*mh), NULL);
+	mh = IOV_PEEK_HEADER(&data, mhc);
 	if (!mh)
 		return -1;
 
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 13/18] dhcpv6: move offset initialization out of dhcpv6_opt()
  2025-04-02 17:23 [PATCH 00/18] Introduce discontiguous frames management Laurent Vivier
                   ` (11 preceding siblings ...)
  2025-04-02 17:23 ` [PATCH 12/18] dhcpv6: Convert to iov_tail Laurent Vivier
@ 2025-04-02 17:23 ` Laurent Vivier
  2025-04-03  5:25   ` David Gibson
  2025-04-02 17:23 ` [PATCH 14/18] dhcpv6: Use iov_tail in dhcpv6_opt() Laurent Vivier
                   ` (4 subsequent siblings)
  17 siblings, 1 reply; 34+ messages in thread
From: Laurent Vivier @ 2025-04-02 17:23 UTC (permalink / raw)
  To: passt-dev; +Cc: Laurent Vivier

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
 dhcpv6.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/dhcpv6.c b/dhcpv6.c
index eb3f188af0b5..ccc64172a480 100644
--- a/dhcpv6.c
+++ b/dhcpv6.c
@@ -54,14 +54,14 @@ struct opt_hdr {
 	uint16_t l;
 } __attribute__((packed));
 
+#define UDP_MSG_HDR_SIZE	(sizeof(struct udphdr) + sizeof(struct msg_hdr))
 # define OPT_SIZE_CONV(x)	(htons_constant(x))
 #define OPT_SIZE(x)		OPT_SIZE_CONV(sizeof(struct opt_##x) -	\
 					      sizeof(struct opt_hdr))
 #define OPT_VSIZE(x)		(sizeof(struct opt_##x) - 		\
 				 sizeof(struct opt_hdr))
 #define OPT_MAX_SIZE		IPV6_MIN_MTU - (sizeof(struct ipv6hdr) + \
-						sizeof(struct udphdr) + \
-						sizeof(struct msg_hdr))
+						UDP_MSG_HDR_SIZE)
 
 /**
  * struct opt_client_id - DHCPv6 Client Identifier option
@@ -290,8 +290,7 @@ static struct opt_hdr *dhcpv6_opt(const struct pool *p, size_t *offset,
 	struct opt_hdr *o;
 	size_t left;
 
-	if (!*offset)
-		*offset = sizeof(struct udphdr) + sizeof(struct msg_hdr);
+	ASSERT(*offset >= UDP_MSG_HDR_SIZE);
 
 	while ((o = packet_get_try(p, 0, *offset, sizeof(*o), &left))) {
 		unsigned int opt_len = ntohs(o->l) + sizeof(*o);
@@ -327,7 +326,7 @@ static struct opt_hdr *dhcpv6_ia_notonlink(const struct pool *p,
 	size_t offset;
 
 	foreach(ia_type, ia_types) {
-		offset = 0;
+		offset = UDP_MSG_HDR_SIZE;
 		while ((ia = dhcpv6_opt(p, &offset, *ia_type))) {
 			if (ntohs(ia->l) < OPT_VSIZE(ia_na))
 				return NULL;
@@ -464,8 +463,9 @@ static size_t dhcpv6_client_fqdn_fill(const struct pool *p, const struct ctx *c,
 
 	o = (struct opt_client_fqdn *)(buf + offset);
 	encode_domain_name(o->domain_name, c->fqdn);
-	req_opt = (struct opt_client_fqdn *)dhcpv6_opt(p, &(size_t){ 0 },
-						       OPT_CLIENT_FQDN);
+	req_opt = (struct opt_client_fqdn *)dhcpv6_opt(p,
+						&(size_t){ UDP_MSG_HDR_SIZE },
+						OPT_CLIENT_FQDN);
 	if (req_opt && req_opt->flags & 0x01 /* S flag */)
 		o->flags = 0x02 /* O flag */;
 	else
@@ -529,15 +529,15 @@ int dhcpv6(struct ctx *c, const struct pool *p,
 	if (!mh)
 		return -1;
 
-	client_id = dhcpv6_opt(p, &(size_t){ 0 }, OPT_CLIENTID);
+	client_id = dhcpv6_opt(p, &(size_t){ UDP_MSG_HDR_SIZE }, OPT_CLIENTID);
 	if (!client_id || ntohs(client_id->l) > OPT_VSIZE(client_id))
 		return -1;
 
-	server_id = dhcpv6_opt(p, &(size_t){ 0 }, OPT_SERVERID);
+	server_id = dhcpv6_opt(p, &(size_t){ UDP_MSG_HDR_SIZE }, OPT_SERVERID);
 	if (server_id && ntohs(server_id->l) != OPT_VSIZE(server_id))
 		return -1;
 
-	ia =        dhcpv6_opt(p, &(size_t){ 0 }, OPT_IA_NA);
+	ia =        dhcpv6_opt(p, &(size_t){ UDP_MSG_HDR_SIZE }, OPT_IA_NA);
 	if (ia && ntohs(ia->l) < MIN(OPT_VSIZE(ia_na), OPT_VSIZE(ia_ta)))
 		return -1;
 
@@ -587,7 +587,7 @@ int dhcpv6(struct ctx *c, const struct pool *p,
 		    memcmp(&resp.server_id, server_id, sizeof(resp.server_id)))
 			return -1;
 
-		if (ia || dhcpv6_opt(p, &(size_t){ 0 }, OPT_IA_TA))
+		if (ia || dhcpv6_opt(p, &(size_t){ UDP_MSG_HDR_SIZE }, OPT_IA_TA))
 			return -1;
 
 		info("DHCPv6: received INFORMATION_REQUEST, sending REPLY");
-- 
@@ -54,14 +54,14 @@ struct opt_hdr {
 	uint16_t l;
 } __attribute__((packed));
 
+#define UDP_MSG_HDR_SIZE	(sizeof(struct udphdr) + sizeof(struct msg_hdr))
 # define OPT_SIZE_CONV(x)	(htons_constant(x))
 #define OPT_SIZE(x)		OPT_SIZE_CONV(sizeof(struct opt_##x) -	\
 					      sizeof(struct opt_hdr))
 #define OPT_VSIZE(x)		(sizeof(struct opt_##x) - 		\
 				 sizeof(struct opt_hdr))
 #define OPT_MAX_SIZE		IPV6_MIN_MTU - (sizeof(struct ipv6hdr) + \
-						sizeof(struct udphdr) + \
-						sizeof(struct msg_hdr))
+						UDP_MSG_HDR_SIZE)
 
 /**
  * struct opt_client_id - DHCPv6 Client Identifier option
@@ -290,8 +290,7 @@ static struct opt_hdr *dhcpv6_opt(const struct pool *p, size_t *offset,
 	struct opt_hdr *o;
 	size_t left;
 
-	if (!*offset)
-		*offset = sizeof(struct udphdr) + sizeof(struct msg_hdr);
+	ASSERT(*offset >= UDP_MSG_HDR_SIZE);
 
 	while ((o = packet_get_try(p, 0, *offset, sizeof(*o), &left))) {
 		unsigned int opt_len = ntohs(o->l) + sizeof(*o);
@@ -327,7 +326,7 @@ static struct opt_hdr *dhcpv6_ia_notonlink(const struct pool *p,
 	size_t offset;
 
 	foreach(ia_type, ia_types) {
-		offset = 0;
+		offset = UDP_MSG_HDR_SIZE;
 		while ((ia = dhcpv6_opt(p, &offset, *ia_type))) {
 			if (ntohs(ia->l) < OPT_VSIZE(ia_na))
 				return NULL;
@@ -464,8 +463,9 @@ static size_t dhcpv6_client_fqdn_fill(const struct pool *p, const struct ctx *c,
 
 	o = (struct opt_client_fqdn *)(buf + offset);
 	encode_domain_name(o->domain_name, c->fqdn);
-	req_opt = (struct opt_client_fqdn *)dhcpv6_opt(p, &(size_t){ 0 },
-						       OPT_CLIENT_FQDN);
+	req_opt = (struct opt_client_fqdn *)dhcpv6_opt(p,
+						&(size_t){ UDP_MSG_HDR_SIZE },
+						OPT_CLIENT_FQDN);
 	if (req_opt && req_opt->flags & 0x01 /* S flag */)
 		o->flags = 0x02 /* O flag */;
 	else
@@ -529,15 +529,15 @@ int dhcpv6(struct ctx *c, const struct pool *p,
 	if (!mh)
 		return -1;
 
-	client_id = dhcpv6_opt(p, &(size_t){ 0 }, OPT_CLIENTID);
+	client_id = dhcpv6_opt(p, &(size_t){ UDP_MSG_HDR_SIZE }, OPT_CLIENTID);
 	if (!client_id || ntohs(client_id->l) > OPT_VSIZE(client_id))
 		return -1;
 
-	server_id = dhcpv6_opt(p, &(size_t){ 0 }, OPT_SERVERID);
+	server_id = dhcpv6_opt(p, &(size_t){ UDP_MSG_HDR_SIZE }, OPT_SERVERID);
 	if (server_id && ntohs(server_id->l) != OPT_VSIZE(server_id))
 		return -1;
 
-	ia =        dhcpv6_opt(p, &(size_t){ 0 }, OPT_IA_NA);
+	ia =        dhcpv6_opt(p, &(size_t){ UDP_MSG_HDR_SIZE }, OPT_IA_NA);
 	if (ia && ntohs(ia->l) < MIN(OPT_VSIZE(ia_na), OPT_VSIZE(ia_ta)))
 		return -1;
 
@@ -587,7 +587,7 @@ int dhcpv6(struct ctx *c, const struct pool *p,
 		    memcmp(&resp.server_id, server_id, sizeof(resp.server_id)))
 			return -1;
 
-		if (ia || dhcpv6_opt(p, &(size_t){ 0 }, OPT_IA_TA))
+		if (ia || dhcpv6_opt(p, &(size_t){ UDP_MSG_HDR_SIZE }, OPT_IA_TA))
 			return -1;
 
 		info("DHCPv6: received INFORMATION_REQUEST, sending REPLY");
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 14/18] dhcpv6: Use iov_tail in dhcpv6_opt()
  2025-04-02 17:23 [PATCH 00/18] Introduce discontiguous frames management Laurent Vivier
                   ` (12 preceding siblings ...)
  2025-04-02 17:23 ` [PATCH 13/18] dhcpv6: move offset initialization out of dhcpv6_opt() Laurent Vivier
@ 2025-04-02 17:23 ` Laurent Vivier
  2025-04-03  5:37   ` David Gibson
  2025-04-02 17:23 ` [PATCH 15/18] dhcp: Convert to iov_tail Laurent Vivier
                   ` (3 subsequent siblings)
  17 siblings, 1 reply; 34+ messages in thread
From: Laurent Vivier @ 2025-04-02 17:23 UTC (permalink / raw)
  To: passt-dev; +Cc: Laurent Vivier

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
 dhcpv6.c | 57 +++++++++++++++++++++++++++-----------------------------
 1 file changed, 27 insertions(+), 30 deletions(-)

diff --git a/dhcpv6.c b/dhcpv6.c
index ccc64172a480..1e83f2c2ad23 100644
--- a/dhcpv6.c
+++ b/dhcpv6.c
@@ -278,30 +278,25 @@ static struct resp_not_on_link_t {
 
 /**
  * dhcpv6_opt() - Get option from DHCPv6 message
- * @p:		Packet pool, single packet with UDP header
- * @offset:	Offset to look at, 0: end of header, set to option start
+ * @data:	Data to look at
  * @type:	Option type to look up, network order
  *
  * Return: pointer to option header, or NULL on malformed or missing option
  */
-static struct opt_hdr *dhcpv6_opt(const struct pool *p, size_t *offset,
-				  uint16_t type)
+static struct opt_hdr *dhcpv6_opt(struct iov_tail *data, uint16_t type)
 {
-	struct opt_hdr *o;
-	size_t left;
+	struct opt_hdr *o, oc;
 
-	ASSERT(*offset >= UDP_MSG_HDR_SIZE);
-
-	while ((o = packet_get_try(p, 0, *offset, sizeof(*o), &left))) {
+	while ((o = IOV_PEEK_HEADER(data, oc))) {
 		unsigned int opt_len = ntohs(o->l) + sizeof(*o);
 
-		if (ntohs(o->l) > left)
+		if (opt_len > iov_tail_size(data))
 			return NULL;
 
 		if (o->t == type)
 			return o;
 
-		*offset += opt_len;
+		data->off += opt_len;
 	}
 
 	return NULL;
@@ -309,31 +304,31 @@ static struct opt_hdr *dhcpv6_opt(const struct pool *p, size_t *offset,
 
 /**
  * dhcpv6_ia_notonlink() - Check if any IA contains non-appropriate addresses
- * @p:		Packet pool, single packet starting from UDP header
+ * @data:	Data to look at, packet starting from UDP header
  * @la:		Address we want to lease to the client
  *
  * Return: pointer to non-appropriate IA_NA or IA_TA, if any, NULL otherwise
  */
-static struct opt_hdr *dhcpv6_ia_notonlink(const struct pool *p,
+static struct opt_hdr *dhcpv6_ia_notonlink(const struct iov_tail *data,
 					   struct in6_addr *la)
 {
 	int ia_types[2] = { OPT_IA_NA, OPT_IA_TA }, *ia_type;
 	const struct opt_ia_addr *opt_addr;
 	char buf[INET6_ADDRSTRLEN];
 	struct in6_addr req_addr;
+	struct iov_tail current;
 	const struct opt_hdr *h;
 	struct opt_hdr *ia;
-	size_t offset;
 
 	foreach(ia_type, ia_types) {
-		offset = UDP_MSG_HDR_SIZE;
-		while ((ia = dhcpv6_opt(p, &offset, *ia_type))) {
+		current = *data;
+		while ((ia = dhcpv6_opt(&current, *ia_type))) {
 			if (ntohs(ia->l) < OPT_VSIZE(ia_na))
 				return NULL;
 
-			offset += sizeof(struct opt_ia_na);
+			current.off += sizeof(struct opt_ia_na);
 
-			while ((h = dhcpv6_opt(p, &offset, OPT_IAAADR))) {
+			while ((h = dhcpv6_opt(&current, OPT_IAAADR))) {
 				if (ntohs(h->l) != OPT_VSIZE(ia_addr))
 					return NULL;
 
@@ -342,7 +337,7 @@ static struct opt_hdr *dhcpv6_ia_notonlink(const struct pool *p,
 				if (!IN6_ARE_ADDR_EQUAL(la, &req_addr))
 					goto err;
 
-				offset += sizeof(struct opt_ia_addr);
+				current.off += sizeof(struct opt_ia_addr);
 			}
 		}
 	}
@@ -434,13 +429,15 @@ search:
 
 /**
  * dhcpv6_client_fqdn_fill() - Fill in client FQDN option
+ * @data:	Data to look at
  * @c:		Execution context
  * @buf:	Response message buffer where options will be appended
  * @offset:	Offset in message buffer for new options
  *
  * Return: updated length of response message buffer.
  */
-static size_t dhcpv6_client_fqdn_fill(const struct pool *p, const struct ctx *c,
+static size_t dhcpv6_client_fqdn_fill(struct iov_tail *data,
+				      const struct ctx *c,
 				      char *buf, int offset)
 
 {
@@ -463,9 +460,8 @@ static size_t dhcpv6_client_fqdn_fill(const struct pool *p, const struct ctx *c,
 
 	o = (struct opt_client_fqdn *)(buf + offset);
 	encode_domain_name(o->domain_name, c->fqdn);
-	req_opt = (struct opt_client_fqdn *)dhcpv6_opt(p,
-						&(size_t){ UDP_MSG_HDR_SIZE },
-						OPT_CLIENT_FQDN);
+	data->off += UDP_MSG_HDR_SIZE;
+	req_opt = (struct opt_client_fqdn *)dhcpv6_opt(data, OPT_CLIENT_FQDN);
 	if (req_opt && req_opt->flags & 0x01 /* S flag */)
 		o->flags = 0x02 /* O flag */;
 	else
@@ -525,19 +521,19 @@ int dhcpv6(struct ctx *c, const struct pool *p,
 
 	src = &c->ip6.our_tap_ll;
 
-	mh = IOV_PEEK_HEADER(&data, mhc);
+	mh = IOV_REMOVE_HEADER(&data, mhc);
 	if (!mh)
 		return -1;
 
-	client_id = dhcpv6_opt(p, &(size_t){ UDP_MSG_HDR_SIZE }, OPT_CLIENTID);
+	client_id = dhcpv6_opt(&data, OPT_CLIENTID);
 	if (!client_id || ntohs(client_id->l) > OPT_VSIZE(client_id))
 		return -1;
 
-	server_id = dhcpv6_opt(p, &(size_t){ UDP_MSG_HDR_SIZE }, OPT_SERVERID);
+	server_id = dhcpv6_opt(&data, OPT_SERVERID);
 	if (server_id && ntohs(server_id->l) != OPT_VSIZE(server_id))
 		return -1;
 
-	ia =        dhcpv6_opt(p, &(size_t){ UDP_MSG_HDR_SIZE }, OPT_IA_NA);
+	ia =        dhcpv6_opt(&data, OPT_IA_NA);
 	if (ia && ntohs(ia->l) < MIN(OPT_VSIZE(ia_na), OPT_VSIZE(ia_ta)))
 		return -1;
 
@@ -553,7 +549,7 @@ int dhcpv6(struct ctx *c, const struct pool *p,
 		if (mh->type == TYPE_CONFIRM && server_id)
 			return -1;
 
-		if ((bad_ia = dhcpv6_ia_notonlink(p, &c->ip6.addr))) {
+		if ((bad_ia = dhcpv6_ia_notonlink(&data, &c->ip6.addr))) {
 			info("DHCPv6: received CONFIRM with inappropriate IA,"
 			     " sending NotOnLink status in REPLY");
 
@@ -587,7 +583,7 @@ int dhcpv6(struct ctx *c, const struct pool *p,
 		    memcmp(&resp.server_id, server_id, sizeof(resp.server_id)))
 			return -1;
 
-		if (ia || dhcpv6_opt(p, &(size_t){ UDP_MSG_HDR_SIZE }, OPT_IA_TA))
+		if (ia || dhcpv6_opt(&data, OPT_IA_TA))
 			return -1;
 
 		info("DHCPv6: received INFORMATION_REQUEST, sending REPLY");
@@ -619,7 +615,8 @@ int dhcpv6(struct ctx *c, const struct pool *p,
 	n = offsetof(struct resp_t, client_id) +
 	    sizeof(struct opt_hdr) + ntohs(client_id->l);
 	n = dhcpv6_dns_fill(c, (char *)&resp, n);
-	n = dhcpv6_client_fqdn_fill(p, c, (char *)&resp, n);
+	packet_base(p, 0, &data);
+	n = dhcpv6_client_fqdn_fill(&data, c, (char *)&resp, n);
 
 	resp.hdr.xid = mh->xid;
 
-- 
@@ -278,30 +278,25 @@ static struct resp_not_on_link_t {
 
 /**
  * dhcpv6_opt() - Get option from DHCPv6 message
- * @p:		Packet pool, single packet with UDP header
- * @offset:	Offset to look at, 0: end of header, set to option start
+ * @data:	Data to look at
  * @type:	Option type to look up, network order
  *
  * Return: pointer to option header, or NULL on malformed or missing option
  */
-static struct opt_hdr *dhcpv6_opt(const struct pool *p, size_t *offset,
-				  uint16_t type)
+static struct opt_hdr *dhcpv6_opt(struct iov_tail *data, uint16_t type)
 {
-	struct opt_hdr *o;
-	size_t left;
+	struct opt_hdr *o, oc;
 
-	ASSERT(*offset >= UDP_MSG_HDR_SIZE);
-
-	while ((o = packet_get_try(p, 0, *offset, sizeof(*o), &left))) {
+	while ((o = IOV_PEEK_HEADER(data, oc))) {
 		unsigned int opt_len = ntohs(o->l) + sizeof(*o);
 
-		if (ntohs(o->l) > left)
+		if (opt_len > iov_tail_size(data))
 			return NULL;
 
 		if (o->t == type)
 			return o;
 
-		*offset += opt_len;
+		data->off += opt_len;
 	}
 
 	return NULL;
@@ -309,31 +304,31 @@ static struct opt_hdr *dhcpv6_opt(const struct pool *p, size_t *offset,
 
 /**
  * dhcpv6_ia_notonlink() - Check if any IA contains non-appropriate addresses
- * @p:		Packet pool, single packet starting from UDP header
+ * @data:	Data to look at, packet starting from UDP header
  * @la:		Address we want to lease to the client
  *
  * Return: pointer to non-appropriate IA_NA or IA_TA, if any, NULL otherwise
  */
-static struct opt_hdr *dhcpv6_ia_notonlink(const struct pool *p,
+static struct opt_hdr *dhcpv6_ia_notonlink(const struct iov_tail *data,
 					   struct in6_addr *la)
 {
 	int ia_types[2] = { OPT_IA_NA, OPT_IA_TA }, *ia_type;
 	const struct opt_ia_addr *opt_addr;
 	char buf[INET6_ADDRSTRLEN];
 	struct in6_addr req_addr;
+	struct iov_tail current;
 	const struct opt_hdr *h;
 	struct opt_hdr *ia;
-	size_t offset;
 
 	foreach(ia_type, ia_types) {
-		offset = UDP_MSG_HDR_SIZE;
-		while ((ia = dhcpv6_opt(p, &offset, *ia_type))) {
+		current = *data;
+		while ((ia = dhcpv6_opt(&current, *ia_type))) {
 			if (ntohs(ia->l) < OPT_VSIZE(ia_na))
 				return NULL;
 
-			offset += sizeof(struct opt_ia_na);
+			current.off += sizeof(struct opt_ia_na);
 
-			while ((h = dhcpv6_opt(p, &offset, OPT_IAAADR))) {
+			while ((h = dhcpv6_opt(&current, OPT_IAAADR))) {
 				if (ntohs(h->l) != OPT_VSIZE(ia_addr))
 					return NULL;
 
@@ -342,7 +337,7 @@ static struct opt_hdr *dhcpv6_ia_notonlink(const struct pool *p,
 				if (!IN6_ARE_ADDR_EQUAL(la, &req_addr))
 					goto err;
 
-				offset += sizeof(struct opt_ia_addr);
+				current.off += sizeof(struct opt_ia_addr);
 			}
 		}
 	}
@@ -434,13 +429,15 @@ search:
 
 /**
  * dhcpv6_client_fqdn_fill() - Fill in client FQDN option
+ * @data:	Data to look at
  * @c:		Execution context
  * @buf:	Response message buffer where options will be appended
  * @offset:	Offset in message buffer for new options
  *
  * Return: updated length of response message buffer.
  */
-static size_t dhcpv6_client_fqdn_fill(const struct pool *p, const struct ctx *c,
+static size_t dhcpv6_client_fqdn_fill(struct iov_tail *data,
+				      const struct ctx *c,
 				      char *buf, int offset)
 
 {
@@ -463,9 +460,8 @@ static size_t dhcpv6_client_fqdn_fill(const struct pool *p, const struct ctx *c,
 
 	o = (struct opt_client_fqdn *)(buf + offset);
 	encode_domain_name(o->domain_name, c->fqdn);
-	req_opt = (struct opt_client_fqdn *)dhcpv6_opt(p,
-						&(size_t){ UDP_MSG_HDR_SIZE },
-						OPT_CLIENT_FQDN);
+	data->off += UDP_MSG_HDR_SIZE;
+	req_opt = (struct opt_client_fqdn *)dhcpv6_opt(data, OPT_CLIENT_FQDN);
 	if (req_opt && req_opt->flags & 0x01 /* S flag */)
 		o->flags = 0x02 /* O flag */;
 	else
@@ -525,19 +521,19 @@ int dhcpv6(struct ctx *c, const struct pool *p,
 
 	src = &c->ip6.our_tap_ll;
 
-	mh = IOV_PEEK_HEADER(&data, mhc);
+	mh = IOV_REMOVE_HEADER(&data, mhc);
 	if (!mh)
 		return -1;
 
-	client_id = dhcpv6_opt(p, &(size_t){ UDP_MSG_HDR_SIZE }, OPT_CLIENTID);
+	client_id = dhcpv6_opt(&data, OPT_CLIENTID);
 	if (!client_id || ntohs(client_id->l) > OPT_VSIZE(client_id))
 		return -1;
 
-	server_id = dhcpv6_opt(p, &(size_t){ UDP_MSG_HDR_SIZE }, OPT_SERVERID);
+	server_id = dhcpv6_opt(&data, OPT_SERVERID);
 	if (server_id && ntohs(server_id->l) != OPT_VSIZE(server_id))
 		return -1;
 
-	ia =        dhcpv6_opt(p, &(size_t){ UDP_MSG_HDR_SIZE }, OPT_IA_NA);
+	ia =        dhcpv6_opt(&data, OPT_IA_NA);
 	if (ia && ntohs(ia->l) < MIN(OPT_VSIZE(ia_na), OPT_VSIZE(ia_ta)))
 		return -1;
 
@@ -553,7 +549,7 @@ int dhcpv6(struct ctx *c, const struct pool *p,
 		if (mh->type == TYPE_CONFIRM && server_id)
 			return -1;
 
-		if ((bad_ia = dhcpv6_ia_notonlink(p, &c->ip6.addr))) {
+		if ((bad_ia = dhcpv6_ia_notonlink(&data, &c->ip6.addr))) {
 			info("DHCPv6: received CONFIRM with inappropriate IA,"
 			     " sending NotOnLink status in REPLY");
 
@@ -587,7 +583,7 @@ int dhcpv6(struct ctx *c, const struct pool *p,
 		    memcmp(&resp.server_id, server_id, sizeof(resp.server_id)))
 			return -1;
 
-		if (ia || dhcpv6_opt(p, &(size_t){ UDP_MSG_HDR_SIZE }, OPT_IA_TA))
+		if (ia || dhcpv6_opt(&data, OPT_IA_TA))
 			return -1;
 
 		info("DHCPv6: received INFORMATION_REQUEST, sending REPLY");
@@ -619,7 +615,8 @@ int dhcpv6(struct ctx *c, const struct pool *p,
 	n = offsetof(struct resp_t, client_id) +
 	    sizeof(struct opt_hdr) + ntohs(client_id->l);
 	n = dhcpv6_dns_fill(c, (char *)&resp, n);
-	n = dhcpv6_client_fqdn_fill(p, c, (char *)&resp, n);
+	packet_base(p, 0, &data);
+	n = dhcpv6_client_fqdn_fill(&data, c, (char *)&resp, n);
 
 	resp.hdr.xid = mh->xid;
 
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 15/18] dhcp: Convert to iov_tail
  2025-04-02 17:23 [PATCH 00/18] Introduce discontiguous frames management Laurent Vivier
                   ` (13 preceding siblings ...)
  2025-04-02 17:23 ` [PATCH 14/18] dhcpv6: Use iov_tail in dhcpv6_opt() Laurent Vivier
@ 2025-04-02 17:23 ` Laurent Vivier
  2025-04-03  5:47   ` David Gibson
  2025-04-02 17:23 ` [PATCH 16/18] tap: " Laurent Vivier
                   ` (2 subsequent siblings)
  17 siblings, 1 reply; 34+ messages in thread
From: Laurent Vivier @ 2025-04-02 17:23 UTC (permalink / raw)
  To: passt-dev; +Cc: Laurent Vivier

Use packet_base() and extract headers using IOV_REMOVE_HEADER()
and IOV_PEEK_HEADER() rather than packet_get().

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
 dhcp.c | 38 ++++++++++++++++++++++----------------
 iov.c  |  1 -
 2 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/dhcp.c b/dhcp.c
index b0de04be6f27..f3167beb18b7 100644
--- a/dhcp.c
+++ b/dhcp.c
@@ -302,27 +302,32 @@ static void opt_set_dns_search(const struct ctx *c, size_t max_len)
  */
 int dhcp(const struct ctx *c, const struct pool *p)
 {
-	size_t mlen, dlen, offset = 0, opt_len, opt_off = 0;
+	size_t mlen, dlen, opt_len, opt_off = 0;
 	char macstr[ETH_ADDRSTRLEN];
 	struct in_addr mask, dst;
 	const struct ethhdr *eh;
 	const struct iphdr *iph;
 	const struct udphdr *uh;
+	struct iov_tail data;
 	struct msg const *m;
+	struct msg mc;
+	struct ethhdr ehc;
+	struct iphdr iphc;
+	struct udphdr uhc;
 	struct msg reply;
 	unsigned int i;
 
-	eh  = packet_get(p, 0, offset, sizeof(*eh),  NULL);
-	offset += sizeof(*eh);
+	if (!packet_base(p, 0, &data))
+		return -1;
 
-	iph = packet_get(p, 0, offset, sizeof(*iph), NULL);
+	eh = IOV_REMOVE_HEADER(&data, ehc);
+	iph = IOV_PEEK_HEADER(&data, iphc);
 	if (!eh || !iph)
 		return -1;
 
-	offset += iph->ihl * 4UL;
-	uh  = packet_get(p, 0, offset, sizeof(*uh),  &mlen);
-	offset += sizeof(*uh);
+	data.off += iph->ihl * 4UL;
 
+	uh = IOV_REMOVE_HEADER(&data, uhc);
 	if (!uh)
 		return -1;
 
@@ -332,7 +337,9 @@ int dhcp(const struct ctx *c, const struct pool *p)
 	if (c->no_dhcp)
 		return 1;
 
-	m   = packet_get(p, 0, offset, offsetof(struct msg, o), &opt_len);
+	mlen = iov_tail_size(&data);
+	m = (struct msg const *)iov_remove_header_(&data, &mc, offsetof(struct msg, o),
+						   __alignof__(struct msg));
 	if (!m						||
 	    mlen  != ntohs(uh->len) - sizeof(*uh)	||
 	    mlen  <  offsetof(struct msg, o)		||
@@ -355,25 +362,24 @@ int dhcp(const struct ctx *c, const struct pool *p)
 	memset(&reply.file,	0,		sizeof(reply.file));
 	reply.magic		= m->magic;
 
-	offset += offsetof(struct msg, o);
-
 	for (i = 0; i < ARRAY_SIZE(opts); i++)
 		opts[i].clen = -1;
 
+	opt_len = iov_tail_size(&data);
 	while (opt_off + 2 < opt_len) {
-		const uint8_t *olen, *val;
+		const uint8_t *olen;
 		uint8_t *type;
 
-		type = packet_get(p, 0, offset + opt_off,	1,	NULL);
-		olen = packet_get(p, 0, offset + opt_off + 1,	1,	NULL);
+		type = iov_tail_ptr(&data, opt_off, 1);
+		olen = iov_tail_ptr(&data, opt_off + 1, 1);
 		if (!type || !olen)
 			return -1;
 
-		val =  packet_get(p, 0, offset + opt_off + 2,	*olen,	NULL);
-		if (!val)
+		if (iov_tail_size(&data) < opt_off + 2 + *olen)
 			return -1;
 
-		memcpy(&opts[*type].c, val, *olen);
+		iov_to_buf(&data.iov[0], data.cnt, data.off + opt_off + 2,
+			   &opts[*type].c, *olen);
 		opts[*type].clen = *olen;
 		opt_off += *olen + 2;
 	}
diff --git a/iov.c b/iov.c
index 0c69379316aa..66f15585a61a 100644
--- a/iov.c
+++ b/iov.c
@@ -253,7 +253,6 @@ bool iov_drop(struct iov_tail *tail, size_t len)
  * Returns:	pointer to the data in the tail, NULL if the
  *		tail doesn't contains @len bytes.
  */
-/* cppcheck-suppress unusedFunction */
 void *iov_tail_ptr(struct iov_tail *tail, size_t off, size_t len)
 {
 	const struct iovec *iov;
-- 
@@ -253,7 +253,6 @@ bool iov_drop(struct iov_tail *tail, size_t len)
  * Returns:	pointer to the data in the tail, NULL if the
  *		tail doesn't contains @len bytes.
  */
-/* cppcheck-suppress unusedFunction */
 void *iov_tail_ptr(struct iov_tail *tail, size_t off, size_t len)
 {
 	const struct iovec *iov;
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 16/18] tap: Convert to iov_tail
  2025-04-02 17:23 [PATCH 00/18] Introduce discontiguous frames management Laurent Vivier
                   ` (14 preceding siblings ...)
  2025-04-02 17:23 ` [PATCH 15/18] dhcp: Convert to iov_tail Laurent Vivier
@ 2025-04-02 17:23 ` Laurent Vivier
  2025-04-02 17:23 ` [PATCH 17/18] ip: Use iov_tail in ipv6_l4hdr() Laurent Vivier
  2025-04-02 17:23 ` [PATCH 18/18] tap: Convert to iov_tail Laurent Vivier
  17 siblings, 0 replies; 34+ messages in thread
From: Laurent Vivier @ 2025-04-02 17:23 UTC (permalink / raw)
  To: passt-dev; +Cc: Laurent Vivier

Use packet_base() and extract headers using IOV_PEEK_HEADER()
rather than packet_get().

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
 tap.c | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/tap.c b/tap.c
index 4b54807c4101..bb4e23df226e 100644
--- a/tap.c
+++ b/tap.c
@@ -680,28 +680,33 @@ static int tap4_handler(struct ctx *c, const struct pool *in,
 	i = 0;
 resume:
 	for (seq_count = 0, seq = NULL; i < in->count; i++) {
-		size_t l2len, l3len, hlen, l4len;
+		size_t l3len, hlen, l4len;
 		const struct ethhdr *eh;
 		const struct udphdr *uh;
 		struct iov_tail data;
+		struct ethhdr ehc;
 		struct iphdr *iph;
-		const char *l4h;
+		struct iphdr iphc;
+		struct udphdr uhc;
 
-		packet_get(in, i, 0, 0, &l2len);
+		if (!packet_base(in, i, &data))
+			continue;
 
-		eh = packet_get(in, i, 0, sizeof(*eh), &l3len);
+		eh = IOV_PEEK_HEADER(&data, ehc);
 		if (!eh)
 			continue;
 		if (ntohs(eh->h_proto) == ETH_P_ARP) {
 			PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
 
-			data = IOV_TAIL_FROM_BUF((void *)eh, l2len, 0);
 			packet_add(pkt, &data);
 			arp(c, pkt);
 			continue;
 		}
 
-		iph = packet_get(in, i, sizeof(*eh), sizeof(*iph), NULL);
+		data.off += sizeof(*eh);
+		l3len = iov_tail_size(&data);
+
+		iph = IOV_PEEK_HEADER(&data, iphc);
 		if (!iph)
 			continue;
 
@@ -729,8 +734,8 @@ resume:
 		if (iph->saddr && c->ip4.addr_seen.s_addr != iph->saddr)
 			c->ip4.addr_seen.s_addr = iph->saddr;
 
-		l4h = packet_get(in, i, sizeof(*eh) + hlen, l4len, NULL);
-		if (!l4h)
+		data.off += hlen;
+		if (iov_tail_size(&data) != l4len)
 			continue;
 
 		if (iph->protocol == IPPROTO_ICMP) {
@@ -741,7 +746,6 @@ resume:
 
 			tap_packet_debug(iph, NULL, NULL, 0, NULL, 1);
 
-			data = IOV_TAIL_FROM_BUF((void *)l4h, l4len, 0);
 			packet_add(pkt, &data);
 			icmp_tap_handler(c, PIF_TAP, AF_INET,
 					 &iph->saddr, &iph->daddr,
@@ -749,15 +753,17 @@ resume:
 			continue;
 		}
 
-		uh = packet_get(in, i, sizeof(*eh) + hlen, sizeof(*uh), NULL);
+		uh = IOV_PEEK_HEADER(&data, uhc);
 		if (!uh)
 			continue;
 
 		if (iph->protocol == IPPROTO_UDP) {
+			struct iov_tail eh_data;
+
 			PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
 
-			data = IOV_TAIL_FROM_BUF((void *)eh, l2len, 0);
-			packet_add(pkt, &data);
+			packet_base(in, i, &eh_data);
+			packet_add(pkt, &eh_data);
 			if (dhcp(c, pkt))
 				continue;
 		}
@@ -806,7 +812,6 @@ resume:
 #undef L4_SET
 
 append:
-		data = IOV_TAIL_FROM_BUF((void *)l4h, l4len, 0);
 		packet_add((struct pool *)&seq->p, &data);
 	}
 
-- 
@@ -680,28 +680,33 @@ static int tap4_handler(struct ctx *c, const struct pool *in,
 	i = 0;
 resume:
 	for (seq_count = 0, seq = NULL; i < in->count; i++) {
-		size_t l2len, l3len, hlen, l4len;
+		size_t l3len, hlen, l4len;
 		const struct ethhdr *eh;
 		const struct udphdr *uh;
 		struct iov_tail data;
+		struct ethhdr ehc;
 		struct iphdr *iph;
-		const char *l4h;
+		struct iphdr iphc;
+		struct udphdr uhc;
 
-		packet_get(in, i, 0, 0, &l2len);
+		if (!packet_base(in, i, &data))
+			continue;
 
-		eh = packet_get(in, i, 0, sizeof(*eh), &l3len);
+		eh = IOV_PEEK_HEADER(&data, ehc);
 		if (!eh)
 			continue;
 		if (ntohs(eh->h_proto) == ETH_P_ARP) {
 			PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
 
-			data = IOV_TAIL_FROM_BUF((void *)eh, l2len, 0);
 			packet_add(pkt, &data);
 			arp(c, pkt);
 			continue;
 		}
 
-		iph = packet_get(in, i, sizeof(*eh), sizeof(*iph), NULL);
+		data.off += sizeof(*eh);
+		l3len = iov_tail_size(&data);
+
+		iph = IOV_PEEK_HEADER(&data, iphc);
 		if (!iph)
 			continue;
 
@@ -729,8 +734,8 @@ resume:
 		if (iph->saddr && c->ip4.addr_seen.s_addr != iph->saddr)
 			c->ip4.addr_seen.s_addr = iph->saddr;
 
-		l4h = packet_get(in, i, sizeof(*eh) + hlen, l4len, NULL);
-		if (!l4h)
+		data.off += hlen;
+		if (iov_tail_size(&data) != l4len)
 			continue;
 
 		if (iph->protocol == IPPROTO_ICMP) {
@@ -741,7 +746,6 @@ resume:
 
 			tap_packet_debug(iph, NULL, NULL, 0, NULL, 1);
 
-			data = IOV_TAIL_FROM_BUF((void *)l4h, l4len, 0);
 			packet_add(pkt, &data);
 			icmp_tap_handler(c, PIF_TAP, AF_INET,
 					 &iph->saddr, &iph->daddr,
@@ -749,15 +753,17 @@ resume:
 			continue;
 		}
 
-		uh = packet_get(in, i, sizeof(*eh) + hlen, sizeof(*uh), NULL);
+		uh = IOV_PEEK_HEADER(&data, uhc);
 		if (!uh)
 			continue;
 
 		if (iph->protocol == IPPROTO_UDP) {
+			struct iov_tail eh_data;
+
 			PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
 
-			data = IOV_TAIL_FROM_BUF((void *)eh, l2len, 0);
-			packet_add(pkt, &data);
+			packet_base(in, i, &eh_data);
+			packet_add(pkt, &eh_data);
 			if (dhcp(c, pkt))
 				continue;
 		}
@@ -806,7 +812,6 @@ resume:
 #undef L4_SET
 
 append:
-		data = IOV_TAIL_FROM_BUF((void *)l4h, l4len, 0);
 		packet_add((struct pool *)&seq->p, &data);
 	}
 
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 17/18] ip: Use iov_tail in ipv6_l4hdr()
  2025-04-02 17:23 [PATCH 00/18] Introduce discontiguous frames management Laurent Vivier
                   ` (15 preceding siblings ...)
  2025-04-02 17:23 ` [PATCH 16/18] tap: " Laurent Vivier
@ 2025-04-02 17:23 ` Laurent Vivier
  2025-04-02 17:23 ` [PATCH 18/18] tap: Convert to iov_tail Laurent Vivier
  17 siblings, 0 replies; 34+ messages in thread
From: Laurent Vivier @ 2025-04-02 17:23 UTC (permalink / raw)
  To: passt-dev; +Cc: Laurent Vivier

Use packet_base() and extract headers using IOV_REMOVE_HEADER()
and IOV_PEEK_HEADER() rather than packet_get().

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
 ip.c  | 27 ++++++++++++---------------
 ip.h  |  3 +--
 tap.c |  4 +++-
 3 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/ip.c b/ip.c
index 2cc7f6548aff..f13aae08f918 100644
--- a/ip.c
+++ b/ip.c
@@ -23,40 +23,37 @@
 
 /**
  * ipv6_l4hdr() - Find pointer to L4 header in IPv6 packet and extract protocol
- * @p:		Packet pool, packet number @idx has IPv6 header at @offset
- * @idx:	Index of packet in pool
- * @offset:	Pre-calculated IPv6 header offset
+ * @data:	IPv6 packet
  * @proto:	Filled with L4 protocol number
  * @dlen:	Data length (payload excluding header extensions), set on return
  *
  * Return: pointer to L4 header, NULL if not found
  */
-char *ipv6_l4hdr(const struct pool *p, int idx, size_t offset, uint8_t *proto,
-		 size_t *dlen)
+bool ipv6_l4hdr(struct iov_tail *data, uint8_t *proto, size_t *dlen)
 {
 	const struct ipv6_opt_hdr *o;
+	struct ipv6_opt_hdr oc;
 	const struct ipv6hdr *ip6h;
-	char *base;
+	struct ipv6hdr ip6hc;
 	int hdrlen;
 	uint8_t nh;
 
-	base = packet_get(p, idx, 0, 0, NULL);
-	ip6h = packet_get(p, idx, offset, sizeof(*ip6h), dlen);
+	ip6h = IOV_REMOVE_HEADER(data, ip6hc);
 	if (!ip6h)
-		return NULL;
-
-	offset += sizeof(*ip6h);
+		return false;
+	*dlen = iov_tail_size(data);
 
 	nh = ip6h->nexthdr;
 	if (!IPV6_NH_OPT(nh))
 		goto found;
 
-	while ((o = packet_get_try(p, idx, offset, sizeof(*o), dlen))) {
+	while ((o = IOV_PEEK_HEADER(data, oc))) {
+		*dlen = iov_tail_size(data) - sizeof(*o);
 		nh = o->nexthdr;
 		hdrlen = (o->hdrlen + 1) * 8;
 
 		if (IPV6_NH_OPT(nh))
-			offset += hdrlen;
+			data->off += hdrlen;
 		else
 			goto found;
 	}
@@ -65,8 +62,8 @@ char *ipv6_l4hdr(const struct pool *p, int idx, size_t offset, uint8_t *proto,
 
 found:
 	if (nh == 59)
-		return NULL;
+		return false;
 
 	*proto = nh;
-	return base + offset;
+	return true;
 }
diff --git a/ip.h b/ip.h
index 471c57ee4c71..874cbe476a6b 100644
--- a/ip.h
+++ b/ip.h
@@ -115,8 +115,7 @@ static inline uint32_t ip6_get_flow_lbl(const struct ipv6hdr *ip6h)
 		ip6h->flow_lbl[2];
 }
 
-char *ipv6_l4hdr(const struct pool *p, int idx, size_t offset, uint8_t *proto,
-		 size_t *dlen);
+bool ipv6_l4hdr(struct iov_tail *data, uint8_t *proto, size_t *dlen);
 
 /* IPv6 link-local all-nodes multicast adddress, ff02::1 */
 static const struct in6_addr in6addr_ll_all_nodes = {
diff --git a/tap.c b/tap.c
index bb4e23df226e..280a04981954 100644
--- a/tap.c
+++ b/tap.c
@@ -888,8 +888,10 @@ resume:
 		if (plen != check)
 			continue;
 
-		if (!(l4h = ipv6_l4hdr(in, i, sizeof(*eh), &proto, &l4len)))
+		data = IOV_TAIL_FROM_BUF(ip6h, sizeof(*ip6h) + check, 0);
+		if (!ipv6_l4hdr(&data, &proto, &l4len))
 			continue;
+		l4h = (char *)data.iov[0].iov_base + data.off;
 
 		if (IN6_IS_ADDR_LOOPBACK(saddr) || IN6_IS_ADDR_LOOPBACK(daddr)) {
 			char sstr[INET6_ADDRSTRLEN], dstr[INET6_ADDRSTRLEN];
-- 
@@ -888,8 +888,10 @@ resume:
 		if (plen != check)
 			continue;
 
-		if (!(l4h = ipv6_l4hdr(in, i, sizeof(*eh), &proto, &l4len)))
+		data = IOV_TAIL_FROM_BUF(ip6h, sizeof(*ip6h) + check, 0);
+		if (!ipv6_l4hdr(&data, &proto, &l4len))
 			continue;
+		l4h = (char *)data.iov[0].iov_base + data.off;
 
 		if (IN6_IS_ADDR_LOOPBACK(saddr) || IN6_IS_ADDR_LOOPBACK(daddr)) {
 			char sstr[INET6_ADDRSTRLEN], dstr[INET6_ADDRSTRLEN];
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 18/18] tap: Convert to iov_tail
  2025-04-02 17:23 [PATCH 00/18] Introduce discontiguous frames management Laurent Vivier
                   ` (16 preceding siblings ...)
  2025-04-02 17:23 ` [PATCH 17/18] ip: Use iov_tail in ipv6_l4hdr() Laurent Vivier
@ 2025-04-02 17:23 ` Laurent Vivier
  17 siblings, 0 replies; 34+ messages in thread
From: Laurent Vivier @ 2025-04-02 17:23 UTC (permalink / raw)
  To: passt-dev; +Cc: Laurent Vivier

Use packet_base() and extract headers using IOV_REMOVE_HEADER()
and IOV_PEEK_HEADER() rather than packet_get().

Remove packet_get() as it is not used anymore.

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
 packet.c | 53 -----------------------------------------------------
 packet.h | 10 ----------
 tap.c    | 23 ++++++++++++++---------
 3 files changed, 14 insertions(+), 72 deletions(-)

diff --git a/packet.c b/packet.c
index 8066ac12502b..e917b0daad2a 100644
--- a/packet.c
+++ b/packet.c
@@ -103,59 +103,6 @@ void packet_add_do(struct pool *p, struct iov_tail *data,
 	p->count++;
 }
 
-/**
- * packet_get_do() - Get data range from packet descriptor from given pool
- * @p:		Packet pool
- * @idx:	Index of packet descriptor in pool
- * @offset:	Offset of data range in packet descriptor
- * @len:	Length of desired data range
- * @left:	Length of available data after range, set on return, can be NULL
- * @func:	For tracing: name of calling function, NULL means no trace()
- * @line:	For tracing: caller line of function call
- *
- * Return: pointer to start of data range, NULL on invalid range or descriptor
- */
-void *packet_get_do(const struct pool *p, size_t idx, size_t offset,
-		    size_t len, size_t *left, const char *func, int line)
-{
-	char *ptr;
-
-	if (idx >= p->size || idx >= p->count) {
-		if (func) {
-			trace("packet %zu from pool size: %zu, count: %zu, "
-			      "%s:%i", idx, p->size, p->count, func, line);
-		}
-		return NULL;
-	}
-
-	if (len > PACKET_MAX_LEN) {
-		if (func) {
-			trace("packet data length %zu, %s:%i",
-			      len, func, line);
-		}
-		return NULL;
-	}
-
-	if (len + offset > p->pkt[idx].iov_len) {
-		if (func) {
-			trace("data length %zu, offset %zu from length %zu, "
-			      "%s:%i", len, offset, p->pkt[idx].iov_len,
-			      func, line);
-		}
-		return NULL;
-	}
-
-	ptr = (char *)p->pkt[idx].iov_base + offset;
-
-	if (packet_check_range(p, ptr, len, func, line))
-		return NULL;
-
-	if (left)
-		*left = p->pkt[idx].iov_len - offset - len;
-
-	return ptr;
-}
-
 /**
  * packet_base_do() - Get data range from packet descriptor from given pool
  * @p:		Packet pool
diff --git a/packet.h b/packet.h
index 35058e747a43..787535a228a5 100644
--- a/packet.h
+++ b/packet.h
@@ -32,9 +32,6 @@ struct pool {
 int vu_packet_check_range(void *buf, const char *ptr, size_t len);
 void packet_add_do(struct pool *p, struct iov_tail *data,
 		   const char *func, int line);
-void *packet_get_do(const struct pool *p, const size_t idx,
-		    size_t offset, size_t len, size_t *left,
-		    const char *func, int line);
 bool packet_base_do(const struct pool *p, const size_t idx,
 		    struct iov_tail *data,
 		    const char *func, int line);
@@ -43,13 +40,6 @@ void pool_flush(struct pool *p);
 #define packet_add(p, data)					\
 	packet_add_do(p, data, __func__, __LINE__)
 
-#define packet_get(p, idx, offset, len, left)				\
-	packet_get_do(p, idx, offset, len, left, __func__, __LINE__)
-
-
-#define packet_get_try(p, idx, offset, len, left)			\
-	packet_get_do(p, idx, offset, len, left, NULL, 0)
-
 #define packet_base(p, idx, data)					\
 	packet_base_do(p, idx, data, __func__, __LINE__)
 
diff --git a/tap.c b/tap.c
index 280a04981954..d6a1da8481a2 100644
--- a/tap.c
+++ b/tap.c
@@ -870,17 +870,24 @@ resume:
 		const struct udphdr *uh;
 		struct iov_tail data;
 		struct ipv6hdr *ip6h;
+		struct ipv6hdr ip6hc;
+		struct ethhdr ehc;
+		struct udphdr uhc;
 		uint8_t proto;
-		char *l4h;
 
-		eh =   packet_get(in, i, 0,		sizeof(*eh), NULL);
+		if (!packet_base(in, i, &data))
+			return -1;
+
+		eh = IOV_REMOVE_HEADER(&data, ehc);
 		if (!eh)
 			continue;
 
-		ip6h = packet_get(in, i, sizeof(*eh),	sizeof(*ip6h), &check);
+		ip6h = IOV_PEEK_HEADER(&data, ip6hc);
 		if (!ip6h)
 			continue;
 
+		check = iov_tail_size(&data) - sizeof(*ip6h);
+
 		saddr = &ip6h->saddr;
 		daddr = &ip6h->daddr;
 
@@ -888,10 +895,8 @@ resume:
 		if (plen != check)
 			continue;
 
-		data = IOV_TAIL_FROM_BUF(ip6h, sizeof(*ip6h) + check, 0);
 		if (!ipv6_l4hdr(&data, &proto, &l4len))
 			continue;
-		l4h = (char *)data.iov[0].iov_base + data.off;
 
 		if (IN6_IS_ADDR_LOOPBACK(saddr) || IN6_IS_ADDR_LOOPBACK(daddr)) {
 			char sstr[INET6_ADDRSTRLEN], dstr[INET6_ADDRSTRLEN];
@@ -916,6 +921,8 @@ resume:
 		}
 
 		if (proto == IPPROTO_ICMPV6) {
+			const struct icmp6hdr *l4h;
+			struct icmp6hdr l4hc;
 			PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
 
 			if (c->no_icmp)
@@ -924,9 +931,9 @@ resume:
 			if (l4len < sizeof(struct icmp6hdr))
 				continue;
 
-			data = IOV_TAIL_FROM_BUF(l4h, l4len, 0);
 			packet_add(pkt, &data);
 
+			l4h = IOV_PEEK_HEADER(&data, l4hc);
 			if (ndp(c, (struct icmp6hdr *)l4h, saddr, pkt))
 				continue;
 
@@ -939,12 +946,11 @@ resume:
 
 		if (l4len < sizeof(*uh))
 			continue;
-		uh = (struct udphdr *)l4h;
+		uh = IOV_PEEK_HEADER(&data, uhc);
 
 		if (proto == IPPROTO_UDP) {
 			PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
 
-			data = IOV_TAIL_FROM_BUF(l4h, l4len, 0);
 			packet_add(pkt, &data);
 
 			if (dhcpv6(c, pkt, saddr, daddr))
@@ -999,7 +1005,6 @@ resume:
 #undef L4_SET
 
 append:
-		data = IOV_TAIL_FROM_BUF(l4h, l4len, 0);
 		packet_add((struct pool *)&seq->p, &data);
 	}
 
-- 
@@ -870,17 +870,24 @@ resume:
 		const struct udphdr *uh;
 		struct iov_tail data;
 		struct ipv6hdr *ip6h;
+		struct ipv6hdr ip6hc;
+		struct ethhdr ehc;
+		struct udphdr uhc;
 		uint8_t proto;
-		char *l4h;
 
-		eh =   packet_get(in, i, 0,		sizeof(*eh), NULL);
+		if (!packet_base(in, i, &data))
+			return -1;
+
+		eh = IOV_REMOVE_HEADER(&data, ehc);
 		if (!eh)
 			continue;
 
-		ip6h = packet_get(in, i, sizeof(*eh),	sizeof(*ip6h), &check);
+		ip6h = IOV_PEEK_HEADER(&data, ip6hc);
 		if (!ip6h)
 			continue;
 
+		check = iov_tail_size(&data) - sizeof(*ip6h);
+
 		saddr = &ip6h->saddr;
 		daddr = &ip6h->daddr;
 
@@ -888,10 +895,8 @@ resume:
 		if (plen != check)
 			continue;
 
-		data = IOV_TAIL_FROM_BUF(ip6h, sizeof(*ip6h) + check, 0);
 		if (!ipv6_l4hdr(&data, &proto, &l4len))
 			continue;
-		l4h = (char *)data.iov[0].iov_base + data.off;
 
 		if (IN6_IS_ADDR_LOOPBACK(saddr) || IN6_IS_ADDR_LOOPBACK(daddr)) {
 			char sstr[INET6_ADDRSTRLEN], dstr[INET6_ADDRSTRLEN];
@@ -916,6 +921,8 @@ resume:
 		}
 
 		if (proto == IPPROTO_ICMPV6) {
+			const struct icmp6hdr *l4h;
+			struct icmp6hdr l4hc;
 			PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
 
 			if (c->no_icmp)
@@ -924,9 +931,9 @@ resume:
 			if (l4len < sizeof(struct icmp6hdr))
 				continue;
 
-			data = IOV_TAIL_FROM_BUF(l4h, l4len, 0);
 			packet_add(pkt, &data);
 
+			l4h = IOV_PEEK_HEADER(&data, l4hc);
 			if (ndp(c, (struct icmp6hdr *)l4h, saddr, pkt))
 				continue;
 
@@ -939,12 +946,11 @@ resume:
 
 		if (l4len < sizeof(*uh))
 			continue;
-		uh = (struct udphdr *)l4h;
+		uh = IOV_PEEK_HEADER(&data, uhc);
 
 		if (proto == IPPROTO_UDP) {
 			PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
 
-			data = IOV_TAIL_FROM_BUF(l4h, l4len, 0);
 			packet_add(pkt, &data);
 
 			if (dhcpv6(c, pkt, saddr, daddr))
@@ -999,7 +1005,6 @@ resume:
 #undef L4_SET
 
 append:
-		data = IOV_TAIL_FROM_BUF(l4h, l4len, 0);
 		packet_add((struct pool *)&seq->p, &data);
 	}
 
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* Re: [PATCH 01/18] arp: Don't mix incoming and outgoing buffers
  2025-04-02 17:23 ` [PATCH 01/18] arp: Don't mix incoming and outgoing buffers Laurent Vivier
@ 2025-04-03  1:57   ` David Gibson
  0 siblings, 0 replies; 34+ messages in thread
From: David Gibson @ 2025-04-03  1:57 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: passt-dev

[-- Attachment #1: Type: text/plain, Size: 4616 bytes --]

On Wed, Apr 02, 2025 at 07:23:26PM +0200, Laurent Vivier wrote:
> Don't use the memory of the incoming packet to build the outgoing buffer
> as it can be memory of the TX queue in the case of vhost-user.
> 
> Moreover with vhost-user, the packet can be splitted accross several
> iovec and it's easier to rebuild it in a buffer than updating an
> existing iovec array.
> 
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
>  arp.c | 84 ++++++++++++++++++++++++++++++++++++++---------------------
>  1 file changed, 55 insertions(+), 29 deletions(-)
> 
> diff --git a/arp.c b/arp.c
> index fc482bbd9938..9d68d7c3b602 100644
> --- a/arp.c
> +++ b/arp.c
> @@ -31,56 +31,82 @@
>  #include "tap.h"
>  
>  /**
> - * arp() - Check if this is a supported ARP message, reply as needed
> + * ignore_arp() - Check if this is a supported ARP message
>   * @c:		Execution context
> - * @p:		Packet pool, single packet with Ethernet buffer
> + * @ah:		ARP header
> + * @am:		ARP message
>   *
> - * Return: 1 if handled, -1 on failure
> + * Return: true if the message is supported, false otherwise.
>   */
> -int arp(const struct ctx *c, const struct pool *p)
> +static bool ignore_arp(const struct ctx *c,
> +		       const struct arphdr *ah, const struct arpmsg *am)
>  {
> -	unsigned char swap[4];
> -	struct ethhdr *eh;
> -	struct arphdr *ah;
> -	struct arpmsg *am;
> -	size_t l2len;
> -
> -	eh = packet_get(p, 0, 0,			 sizeof(*eh), NULL);
> -	ah = packet_get(p, 0, sizeof(*eh),		 sizeof(*ah), NULL);
> -	am = packet_get(p, 0, sizeof(*eh) + sizeof(*ah), sizeof(*am), NULL);
> -
> -	if (!eh || !ah || !am)
> -		return -1;
> -
>  	if (ah->ar_hrd != htons(ARPHRD_ETHER)	||
>  	    ah->ar_pro != htons(ETH_P_IP)	||
>  	    ah->ar_hln != ETH_ALEN		||
>  	    ah->ar_pln != 4			||
>  	    ah->ar_op  != htons(ARPOP_REQUEST))
> -		return 1;
> +		return true;
>  
>  	/* Discard announcements, but not 0.0.0.0 "probes" */
>  	if (memcmp(am->sip, &in4addr_any, sizeof(am->sip)) &&
>  	    !memcmp(am->sip, am->tip, sizeof(am->sip)))
> -		return 1;
> +		return true;
>  
>  	/* Don't resolve the guest's assigned address, either. */
>  	if (!memcmp(am->tip, &c->ip4.addr, sizeof(am->tip)))
> +		return true;
> +
> +	return false;
> +}
> +
> +/**
> + * arp() - Check if this is a supported ARP message, reply as needed
> + * @c:		Execution context
> + * @p:		Packet pool, single packet with Ethernet buffer
> + *
> + * Return: 1 if handled, -1 on failure
> + */
> +int arp(const struct ctx *c, const struct pool *p)
> +{
> +	struct {
> +		struct ethhdr eh;
> +		struct arphdr ah;
> +		struct arpmsg am;
> +	} __attribute__((__packed__)) resp;
> +	const struct ethhdr *eh;
> +	const struct arphdr *ah;
> +	const struct arpmsg *am;
> +
> +	eh = packet_get(p, 0, 0,			 sizeof(*eh), NULL);
> +	ah = packet_get(p, 0, sizeof(*eh),		 sizeof(*ah), NULL);
> +	am = packet_get(p, 0, sizeof(*eh) + sizeof(*ah), sizeof(*am), NULL);
> +
> +	if (!eh || !ah || !am)
> +		return -1;
> +
> +	if (ignore_arp(c, ah, am))
>  		return 1;
>  
> -	ah->ar_op = htons(ARPOP_REPLY);
> -	memcpy(am->tha,		am->sha,	sizeof(am->tha));
> -	memcpy(am->sha,		c->our_tap_mac,	sizeof(am->sha));
> +	/* ethernet header */
> +	resp.eh.h_proto = htons(ETH_P_ARP);
> +	memcpy(resp.eh.h_dest, eh->h_source, sizeof(resp.eh.h_dest));
> +	memcpy(resp.eh.h_source, c->our_tap_mac, sizeof(resp.eh.h_source));
>  
> -	memcpy(swap,		am->tip,	sizeof(am->tip));
> -	memcpy(am->tip,		am->sip,	sizeof(am->tip));
> -	memcpy(am->sip,		swap,		sizeof(am->sip));
> +	/* ARP header */
> +	resp.ah.ar_op = htons(ARPOP_REPLY);
> +	resp.ah.ar_hrd = ah->ar_hrd;
> +	resp.ah.ar_pro = ah->ar_pro;
> +	resp.ah.ar_hln = ah->ar_hln;
> +	resp.ah.ar_pln = ah->ar_pln;
>  
> -	l2len = sizeof(*eh) + sizeof(*ah) + sizeof(*am);
> -	memcpy(eh->h_dest,	eh->h_source,	sizeof(eh->h_dest));
> -	memcpy(eh->h_source,	c->our_tap_mac,	sizeof(eh->h_source));
> +	/* ARP message */
> +	memcpy(resp.am.sha,		c->our_tap_mac,	sizeof(resp.am.sha));
> +	memcpy(resp.am.sip,		am->tip,	sizeof(resp.am.sip));
> +	memcpy(resp.am.tha,		am->sha,	sizeof(resp.am.tha));
> +	memcpy(resp.am.tip,		am->sip,	sizeof(resp.am.tip));
>  
> -	tap_send_single(c, eh, l2len);
> +	tap_send_single(c, &resp, sizeof(resp));
>  
>  	return 1;
>  }

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 02/18] iov: Update IOV_REMOVE_HEADER() and IOV_PEEK_HEADER()
  2025-04-02 17:23 ` [PATCH 02/18] iov: Update IOV_REMOVE_HEADER() and IOV_PEEK_HEADER() Laurent Vivier
@ 2025-04-03  2:36   ` David Gibson
  0 siblings, 0 replies; 34+ messages in thread
From: David Gibson @ 2025-04-03  2:36 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: passt-dev

[-- Attachment #1: Type: text/plain, Size: 13921 bytes --]

On Wed, Apr 02, 2025 at 07:23:27PM +0200, Laurent Vivier wrote:
> Provide a temporary variable of the wanted type to store
> the header if the memory in the iovec array is not contiguous.
> 
> Also introduce iov_tail_ptr(), iov_drop() and iov_copy()
> for later use.

Not sure if these belong in the same patch.

> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> ---
>  iov.c     | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++----
>  iov.h     |  53 ++++++++++++++++------
>  tcp_buf.c |   2 +-
>  3 files changed, 163 insertions(+), 25 deletions(-)
> 
> diff --git a/iov.c b/iov.c
> index 8c63b7ea6e31..d96fc2ab594b 100644
> --- a/iov.c
> +++ b/iov.c
> @@ -108,7 +108,7 @@ size_t iov_from_buf(const struct iovec *iov, size_t iov_cnt,
>   *
>   * Returns:    The number of bytes successfully copied.
>   */
> -/* cppcheck-suppress unusedFunction */
> +/* cppcheck-suppress [staticFunction,unmatchedSuppression] */
>  size_t iov_to_buf(const struct iovec *iov, size_t iov_cnt,
>  		  size_t offset, void *buf, size_t bytes)
>  {
> @@ -126,6 +126,7 @@ size_t iov_to_buf(const struct iovec *iov, size_t iov_cnt,
>  	/* copying data */
>  	for (copied = 0; copied < bytes && i < iov_cnt; i++) {
>  		size_t len = MIN(iov[i].iov_len - offset, bytes - copied);
> +		/* NOLINTNEXTLINE(clang-analyzer-core.NonNullParamChecker) */
>  		memcpy((char *)buf + copied, (char *)iov[i].iov_base + offset,
>  		       len);
>  		copied += len;
> @@ -156,6 +157,45 @@ size_t iov_size(const struct iovec *iov, size_t iov_cnt)
>  	return len;
>  }
>  
> +/**
> + * iov_copy - Copy data from one scatter/gather I/O vector (struct iovec) to
> + *	      another.

I think this function name is slightly misleading and the description
is very misleading.  What the function does is make a new iov
referencing a subset of the data in the original iov.  It doesn't copy
the reference data - the dst iov will alias (some of) the same memory
as the src iov.

I'd suggest iov_slice() maybe?

> + * @dst_iov:     Pointer to the destination array of struct iovec describing
> + *		 the scatter/gather I/O vector to copy to.
> + * @dst_iov_cnt: Number of elements in the destination iov array.

Maybe worth emphasising that this is a maximum, we might not use all of them.

> + * @iov:	 Pointer to the source array of struct iovec describing
> + *		 the scatter/gather I/O vector to copy from.
> + * @iov_cnt:     Number of elements in the source iov array.
> + * @offset:      Offset within the source iov from where copying should start.
> + * @bytes:	 Total number of bytes to copy from iov to dst_iov.
> + *
> + * Returns:      The number of elements successfully copied to the destination
> + *		 iov array.

So, this is obviously useful, because it's effectively the "after"
value of dst_iov.  However, it doesn't necessarily distinguish the
(AFAICT) three exit conditions:
  1) "success"; iov_size(dst_iov) == bytes afterwards
  2) iov_size(dst_iov) < bytes, because we ran out of data in the
     source iov
  3) iov_size(dst_iov) < bytes, because we needed more than
     @dst_iov_cnt "slots" to reference that much of @iov.

On "failure", whether it's (2) or (3) seems like it could matter, so
it would be nice if we can deduce that from the return value.

> + */
> +/* cppcheck-suppress unusedFunction */
> +unsigned iov_copy(struct iovec *dst_iov, size_t dst_iov_cnt,
> +		  const struct iovec *iov, size_t iov_cnt,
> +		  size_t offset, size_t bytes)
> +{
> +	unsigned int i, j;
> +
> +	i = iov_skip_bytes(iov, iov_cnt, offset, &offset);
> +
> +	/* copying data */
> +	for (j = 0; i < iov_cnt && j < dst_iov_cnt && bytes; i++) {
> +		size_t len = MIN(bytes, iov[i].iov_len - offset);
> +
> +		dst_iov[j].iov_base = (char *)iov[i].iov_base + offset;
> +		dst_iov[j].iov_len = len;
> +		j++;
> +		bytes -= len;
> +		offset = 0;
> +	}
> +
> +	return j;
> +}
> +
>  /**
>   * iov_tail_prune() - Remove any unneeded buffers from an IOV tail
>   * @tail:	IO vector tail (modified)
> @@ -192,7 +232,50 @@ size_t iov_tail_size(struct iov_tail *tail)
>  }
>  
>  /**
> - * iov_peek_header_() - Get pointer to a header from an IOV tail
> + * iov_drop() - update head of the tail

Cute phrasing, but kind of confusing.  I think this wants similar
phrasing to remove_header, since it's doing basically the same thing,
but discarding rather than retreiving the data.

> + * @tail:	IO vector tail
> + * @len:	length to move the head of the tail
> + *
> + * Returns:	true if the tail still contains any bytes, otherwise false
> + */
> +/* cppcheck-suppress unusedFunction */
> +bool iov_drop(struct iov_tail *tail, size_t len)
> +{
> +	tail->off = tail->off + len;
> +
> +	return iov_tail_prune(tail);
> +}
> +
> +/**
> + * iov_tail_ptr() - get a pointer to the head of the tail
> + * @tail:	IO vector tail
> + * @off:	Byte offset in the tail to skip
> + * @len:	Length of the data to get, in bytes
> + *
> + * Returns:	pointer to the data in the tail, NULL if the
> + *		tail doesn't contains @len bytes.

This is a dangerous interface, because it returns a pointer with no
indication of how many bytes after it are actually valid.  I can see
it being useful internally, but I don't think it should be exported.

> + */
> +/* cppcheck-suppress unusedFunction */
> +void *iov_tail_ptr(struct iov_tail *tail, size_t off, size_t len)
> +{
> +	const struct iovec *iov;
> +	size_t cnt, i;
> +
> +	i = iov_skip_bytes(tail->iov, tail->cnt, tail->off + off, &off);
> +	if (i == tail->cnt)
> +		return NULL;
> +
> +	iov = &tail->iov[i];
> +	cnt = tail->cnt - i;
> +
> +	if (iov_size(iov, cnt) < off + len)
> +		return NULL;
> +
> +	return (char *)iov[0].iov_base + off;
> +}
> +
> +/**
> + * iov_check_header - Check if a header can be accessed
>   * @tail:	IOV tail to get header from
>   * @len:	Length of header to get, in bytes
>   * @align:	Required alignment of header, in bytes
> @@ -203,8 +286,7 @@ size_t iov_tail_size(struct iov_tail *tail)
>   *	    overruns the IO vector, is not contiguous or doesn't have the
>   *	    requested alignment.
>   */
> -/* cppcheck-suppress [staticFunction,unmatchedSuppression] */
> -void *iov_peek_header_(struct iov_tail *tail, size_t len, size_t align)
> +static void *iov_check_header(struct iov_tail *tail, size_t len, size_t align)
>  {
>  	char *p;
>  
> @@ -225,7 +307,36 @@ void *iov_peek_header_(struct iov_tail *tail, size_t len, size_t align)
>  }
>  
>  /**
> - * iov_remove_header_() - Remove a header from an IOV tail
> + * iov_peek_header_ - Get pointer to a header from an IOV tail
> + * @tail:	IOV tail to get header from

Needs a description for @v.

> + * @len:	Length of header to get, in bytes
> + * @align:	Required alignment of header, in bytes
> + *
> + * @tail may be pruned, but will represent the same bytes as before.
> + *
> + * Returns: Pointer to the first @len logical bytes of the tail, or to
> + *          a copy if that overruns the IO vector, is not contiguous or
> + *          doesn't have the requested alignment. NULL if that overruns the
> + *          IO vector.
> + */
> +/* cppcheck-suppress [staticFunction,unmatchedSuppression] */
> +void *iov_peek_header_(struct iov_tail *tail, void *v, size_t len, size_t align)
> +{
> +	char *p = iov_check_header(tail, len, align);
> +	size_t l;
> +
> +	if (p)
> +		return p;
> +
> +	l = iov_to_buf(tail->iov, tail->cnt,  tail->off, v, len);
> +	if (l != len)
> +		return NULL;
> +
> +	return v;
> +}
> +
> +/**
> + * iov_remove_header_ - Remove a header from an IOV tail
>   * @tail:	IOV tail to remove header from (modified)

Needs description of @v.

>   * @len:	Length of header to remove, in bytes
>   * @align:	Required alignment of header, in bytes
> @@ -233,17 +344,19 @@ void *iov_peek_header_(struct iov_tail *tail, size_t len, size_t align)
>   * On success, @tail is updated so that it longer includes the bytes of the
>   * returned header.
>   *
> - * Returns: Pointer to the first @len logical bytes of the tail, NULL if that
> - *	    overruns the IO vector, is not contiguous or doesn't have the
> - *	    requested alignment.
> + * Returns: Pointer to the first @len logical bytes of the tail, or to
> + *          a copy if that overruns the IO vector, is not contiguous or
> + *          doesn't have the requested alignment. NULL if that overruns the
> + *          IO vector.
>   */
> -void *iov_remove_header_(struct iov_tail *tail, size_t len, size_t align)
> +void *iov_remove_header_(struct iov_tail *tail, void *v, size_t len, size_t align)
>  {
> -	char *p = iov_peek_header_(tail, len, align);
> +	char *p = iov_peek_header_(tail, v, len, align);
>  
>  	if (!p)
>  		return NULL;
>  
>  	tail->off = tail->off + len;
> +
>  	return p;
>  }
> diff --git a/iov.h b/iov.h
> index 9855bf0c0c32..f9b5fdf0803e 100644
> --- a/iov.h
> +++ b/iov.h
> @@ -28,6 +28,9 @@ size_t iov_from_buf(const struct iovec *iov, size_t iov_cnt,
>  size_t iov_to_buf(const struct iovec *iov, size_t iov_cnt,
>                    size_t offset, void *buf, size_t bytes);
>  size_t iov_size(const struct iovec *iov, size_t iov_cnt);
> +unsigned iov_copy(struct iovec *dst_iov, size_t dst_iov_cnt,
> +                  const struct iovec *iov, size_t iov_cnt,
> +                  size_t offset, size_t bytes);
>  
>  /*
>   * DOC: Theory of Operation, struct iov_tail
> @@ -70,38 +73,60 @@ struct iov_tail {
>  #define IOV_TAIL(iov_, cnt_, off_) \
>  	(struct iov_tail){ .iov = (iov_), .cnt = (cnt_), .off = (off_) }
>  
> +/**
> + * IOV_TAIL_FROM_BUF() - Create a new IOV tail from a buffer
> + * @buf_:	Buffer address to use in the iovec
> + * @len_:	Buffer size
> + * @off_:	Byte offset in the buffer where the tail begins
> + */
> +#define IOV_TAIL_FROM_BUF(buf_, len_, off_) \
> +	IOV_TAIL((&(const struct iovec){ .iov_base = (buf_), .iov_len = (len_) }), 1, (off_))
> +
>  bool iov_tail_prune(struct iov_tail *tail);
>  size_t iov_tail_size(struct iov_tail *tail);
> -void *iov_peek_header_(struct iov_tail *tail, size_t len, size_t align);
> -void *iov_remove_header_(struct iov_tail *tail, size_t len, size_t align);
> +bool iov_drop(struct iov_tail *tail, size_t len);
> +void *iov_peek_header_(struct iov_tail *tail, void *v, size_t len, size_t align);
> +void *iov_remove_header_(struct iov_tail *tail, void *v, size_t len, size_t align);
> +void *iov_tail_ptr(struct iov_tail *tail, size_t off, size_t len);
>  
>  /**
>   * IOV_PEEK_HEADER() - Get typed pointer to a header from an IOV tail
>   * @tail_:	IOV tail to get header from
> - * @type_:	Data type of the header
> + * @var_:	Temporary buffer of the type of the header to use if
> + *		the memory in the iovec array is not contiguous.
>   *
>   * @tail_ may be pruned, but will represent the same bytes as before.
>   *
> - * Returns: Pointer of type (@type_ *) located at the start of @tail_, NULL if
> - *          we can't get a contiguous and aligned pointer.
> + * Returns: Pointer of type (@type_ *) located at the start of @tail_

This is no longer quite accurate: the data is the same, but it's not
necessarily a pointer into @tail_ proper.  There's also still a NULL
return case, just more limited than it was before.

>   */
> -#define IOV_PEEK_HEADER(tail_, type_)					\
> -	((type_ *)(iov_peek_header_((tail_),				\
> -				    sizeof(type_), __alignof__(type_))))
> +
> +#define IOV_PEEK_HEADER(tail_, var_)					\
> +	((__typeof__(var_) *)(iov_peek_header_((tail_), &(var_),	\
> +				      sizeof(var_), __alignof__(var_))))
>  
>  /**
>   * IOV_REMOVE_HEADER() - Remove and return typed header from an IOV tail
>   * @tail_:	IOV tail to remove header from (modified)
> - * @type_:	Data type of the header to remove
> + * @var_:	Temporary buffer of the type of the header to use if
> + *		the memory in the iovec array is not contiguous.
>   *
>   * On success, @tail_ is updated so that it longer includes the bytes of the
>   * returned header.
>   *
> - * Returns: Pointer of type (@type_ *) located at the old start of @tail_, NULL
> - *          if we can't get a contiguous and aligned pointer.
> + * Returns: Pointer of type (@type_ *) located at the old start of @tail_
> + */
> +
> +#define IOV_REMOVE_HEADER(tail_, var_)					\
> +	((__typeof__(var_) *)(iov_remove_header_((tail_), &(var_),	\
> +				    sizeof(var_), __alignof__(var_))))
> +
> +/** IOV_DROP_HEADER() -- Remove a typed header from an IOV tail
> + * @tail_:	IOV tail to remove header from (modified)
> + * @type_:	Data type of the header to remove
> + *
> + * Returns:	true if the tail still contains any bytes, otherwise false
>   */
> -#define IOV_REMOVE_HEADER(tail_, type_)					\
> -	((type_ *)(iov_remove_header_((tail_),				\
> -				      sizeof(type_), __alignof__(type_))))
> +#define IOV_DROP_HEADER(tail_, type_)					\
> +	iov_drop((tail_), sizeof(type_))
>  
>  #endif /* IOVEC_H */
> diff --git a/tcp_buf.c b/tcp_buf.c
> index 05305636b503..4bcc1acb245a 100644
> --- a/tcp_buf.c
> +++ b/tcp_buf.c
> @@ -160,7 +160,7 @@ static void tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn,
>  				    uint32_t seq, bool no_tcp_csum)
>  {
>  	struct iov_tail tail = IOV_TAIL(&iov[TCP_IOV_PAYLOAD], 1, 0);
> -	struct tcphdr *th = IOV_REMOVE_HEADER(&tail, struct tcphdr);
> +	struct tcphdr thc, *th = IOV_REMOVE_HEADER(&tail, thc);
>  	struct tap_hdr *taph = iov[TCP_IOV_TAP].iov_base;
>  	const struct flowside *tapside = TAPFLOW(conn);
>  	const struct in_addr *a4 = inany_v4(&tapside->oaddr);

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 03/18] tap: Use iov_tail with tap_add_packet()
  2025-04-02 17:23 ` [PATCH 03/18] tap: Use iov_tail with tap_add_packet() Laurent Vivier
@ 2025-04-03  4:50   ` David Gibson
  0 siblings, 0 replies; 34+ messages in thread
From: David Gibson @ 2025-04-03  4:50 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: passt-dev

[-- Attachment #1: Type: text/plain, Size: 7011 bytes --]

On Wed, Apr 02, 2025 at 07:23:28PM +0200, Laurent Vivier wrote:
> Use IOV_PEEK_HEADER() to get the ethernet header from the iovec.
> 
> Move the workaround about multiple iovec array from vu_handle_tx() to
> tap_add_packet(). Removing the offset out of the iovec array should
> reduce the iovec count to 1.
> 
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
>  iov.c       |  1 -
>  pcap.c      |  1 +
>  tap.c       | 30 +++++++++++++++++++++---------
>  tap.h       |  2 +-
>  vu_common.c | 25 +++++--------------------
>  5 files changed, 28 insertions(+), 31 deletions(-)
> 
> diff --git a/iov.c b/iov.c
> index d96fc2ab594b..508fb6da91fb 100644
> --- a/iov.c
> +++ b/iov.c
> @@ -238,7 +238,6 @@ size_t iov_tail_size(struct iov_tail *tail)
>   *
>   * Returns:	true if the tail still contains any bytes, otherwise false
>   */
> -/* cppcheck-suppress unusedFunction */
>  bool iov_drop(struct iov_tail *tail, size_t len)
>  {
>  	tail->off = tail->off + len;
> diff --git a/pcap.c b/pcap.c
> index e95aa6fe29a6..404043a27e22 100644
> --- a/pcap.c
> +++ b/pcap.c
> @@ -76,6 +76,7 @@ static void pcap_frame(const struct iovec *iov, size_t iovcnt,
>   * @pkt:	Pointer to data buffer, including L2 headers
>   * @l2len:	L2 frame length
>   */
> +/* cppcheck-suppress unusedFunction */
>  void pcap(const char *pkt, size_t l2len)
>  {
>  	struct iovec iov = { (char *)pkt, l2len };
> diff --git a/tap.c b/tap.c
> index 182a1151f139..ab3effe80f89 100644
> --- a/tap.c
> +++ b/tap.c
> @@ -1040,29 +1040,36 @@ void tap_handler(struct ctx *c, const struct timespec *now)
>  /**
>   * tap_add_packet() - Queue/capture packet, update notion of guest MAC address
>   * @c:		Execution context
> - * @l2len:	Total L2 packet length
> - * @p:		Packet buffer
> + * @data:	Packet to add to the pool
>   */
> -void tap_add_packet(struct ctx *c, ssize_t l2len, char *p)
> +void tap_add_packet(struct ctx *c, struct iov_tail *data)
>  {
>  	const struct ethhdr *eh;
> +	struct ethhdr ehc;
>  
> -	pcap(p, l2len);
> +	pcap_iov(data->iov, data->cnt, data->off);
>  
> -	eh = (struct ethhdr *)p;
> +	eh = IOV_PEEK_HEADER(data, ehc);
> +	if (!eh)
> +		return;
>  
>  	if (memcmp(c->guest_mac, eh->h_source, ETH_ALEN)) {
>  		memcpy(c->guest_mac, eh->h_source, ETH_ALEN);
>  		proto_update_l2_buf(c->guest_mac, NULL);
>  	}
>  
> +	iov_tail_prune(data);
> +	ASSERT(data->cnt == 1); /* packet_add() doesn't support iovec */
> +
>  	switch (ntohs(eh->h_proto)) {
>  	case ETH_P_ARP:
>  	case ETH_P_IP:
> -		packet_add(pool_tap4, l2len, p);
> +		packet_add(pool_tap4, data->iov[0].iov_len - data->off,
> +			   (char *)data->iov[0].iov_base + data->off);
>  		break;
>  	case ETH_P_IPV6:
> -		packet_add(pool_tap6, l2len, p);
> +		packet_add(pool_tap6, data->iov[0].iov_len - data->off,
> +			   (char *)data->iov[0].iov_base + data->off);
>  		break;
>  	default:
>  		break;
> @@ -1128,6 +1135,7 @@ static void tap_passt_input(struct ctx *c, const struct timespec *now)
>  
>  	while (n >= (ssize_t)sizeof(uint32_t)) {
>  		uint32_t l2len = ntohl_unaligned(p);
> +		struct iov_tail data;
>  
>  		if (l2len < sizeof(struct ethhdr) || l2len > L2_MAX_LEN_PASST) {
>  			err("Bad frame size from guest, resetting connection");
> @@ -1142,7 +1150,8 @@ static void tap_passt_input(struct ctx *c, const struct timespec *now)
>  		p += sizeof(uint32_t);
>  		n -= sizeof(uint32_t);
>  
> -		tap_add_packet(c, l2len, p);
> +		data = IOV_TAIL_FROM_BUF(p, l2len, 0);
> +		tap_add_packet(c, &data);
>  
>  		p += l2len;
>  		n -= l2len;
> @@ -1186,6 +1195,8 @@ static void tap_pasta_input(struct ctx *c, const struct timespec *now)
>  	for (n = 0;
>  	     n <= (ssize_t)(sizeof(pkt_buf) - L2_MAX_LEN_PASTA);
>  	     n += len) {
> +		struct iov_tail data;
> +
>  		len = read(c->fd_tap, pkt_buf + n, L2_MAX_LEN_PASTA);
>  
>  		if (len == 0) {
> @@ -1207,7 +1218,8 @@ static void tap_pasta_input(struct ctx *c, const struct timespec *now)
>  		    len > (ssize_t)L2_MAX_LEN_PASTA)
>  			continue;
>  
> -		tap_add_packet(c, len, pkt_buf + n);
> +		data = IOV_TAIL_FROM_BUF(pkt_buf + n, len, 0);
> +		tap_add_packet(c, &data);
>  	}
>  
>  	tap_handler(c, now);
> diff --git a/tap.h b/tap.h
> index dd39fd896f4a..5034acd8ac46 100644
> --- a/tap.h
> +++ b/tap.h
> @@ -119,6 +119,6 @@ void tap_sock_update_pool(void *base, size_t size);
>  void tap_backend_init(struct ctx *c);
>  void tap_flush_pools(void);
>  void tap_handler(struct ctx *c, const struct timespec *now);
> -void tap_add_packet(struct ctx *c, ssize_t l2len, char *p);
> +void tap_add_packet(struct ctx *c, struct iov_tail *data);
>  
>  #endif /* TAP_H */
> diff --git a/vu_common.c b/vu_common.c
> index 686a09b28c8e..e446fc4f2054 100644
> --- a/vu_common.c
> +++ b/vu_common.c
> @@ -159,7 +159,6 @@ static void vu_handle_tx(struct vu_dev *vdev, int index,
>  	struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE];
>  	struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
>  	struct vu_virtq *vq = &vdev->vq[index];
> -	int hdrlen = sizeof(struct virtio_net_hdr_mrg_rxbuf);
>  	int out_sg_count;
>  	int count;
>  
> @@ -172,6 +171,7 @@ static void vu_handle_tx(struct vu_dev *vdev, int index,
>  	while (count < VIRTQUEUE_MAX_SIZE &&
>  	       out_sg_count + VU_MAX_TX_BUFFER_NB <= VIRTQUEUE_MAX_SIZE) {
>  		int ret;
> +		struct iov_tail data;
>  
>  		elem[count].out_num = VU_MAX_TX_BUFFER_NB;
>  		elem[count].out_sg = &out_sg[out_sg_count];
> @@ -187,25 +187,10 @@ static void vu_handle_tx(struct vu_dev *vdev, int index,
>  			warn("virtio-net transmit queue contains no out buffers");
>  			break;
>  		}
> -		if (elem[count].out_num == 1) {
> -			tap_add_packet(vdev->context,
> -				       elem[count].out_sg[0].iov_len - hdrlen,
> -				       (char *)elem[count].out_sg[0].iov_base +
> -				        hdrlen);
> -		} else {
> -			/* vnet header can be in a separate iovec */
> -			if (elem[count].out_num != 2) {
> -				debug("virtio-net transmit queue contains more than one buffer ([%d]: %u)",
> -				      count, elem[count].out_num);
> -			} else if (elem[count].out_sg[0].iov_len != (size_t)hdrlen) {
> -				debug("virtio-net transmit queue entry not aligned on hdrlen ([%d]: %d != %zu)",
> -				      count, hdrlen, elem[count].out_sg[0].iov_len);
> -			} else {
> -				tap_add_packet(vdev->context,
> -					       elem[count].out_sg[1].iov_len,
> -					       (char *)elem[count].out_sg[1].iov_base);
> -			}
> -		}
> +
> +		data = IOV_TAIL(elem[count].out_sg, elem[count].out_num, 0);
> +		if (IOV_DROP_HEADER(&data, struct virtio_net_hdr_mrg_rxbuf))
> +			tap_add_packet(vdev->context, &data);
>  
>  		count++;
>  	}

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 04/18] packet: Use iov_tail with packet_add()
  2025-04-02 17:23 ` [PATCH 04/18] packet: Use iov_tail with packet_add() Laurent Vivier
@ 2025-04-03  4:54   ` David Gibson
  0 siblings, 0 replies; 34+ messages in thread
From: David Gibson @ 2025-04-03  4:54 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: passt-dev

[-- Attachment #1: Type: text/plain, Size: 6737 bytes --]

On Wed, Apr 02, 2025 at 07:23:29PM +0200, Laurent Vivier wrote:

Needs at least some sort of commit message.

> Signed-off-by: Laurent Vivier <lvivier@redhat.com>

But otherwise,

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
>  iov.h    |  1 +
>  packet.c | 15 ++++++++++++---
>  packet.h |  8 +++++---
>  tap.c    | 32 ++++++++++++++++++--------------
>  4 files changed, 36 insertions(+), 20 deletions(-)
> 
> diff --git a/iov.h b/iov.h
> index f9b5fdf0803e..99442d031549 100644
> --- a/iov.h
> +++ b/iov.h
> @@ -17,6 +17,7 @@
>  
>  #include <unistd.h>
>  #include <string.h>
> +#include <stdbool.h>
>  
>  #define IOV_OF_LVALUE(lval) \
>  	(struct iovec){ .iov_base = &(lval), .iov_len = sizeof(lval) }
> diff --git a/packet.c b/packet.c
> index bcac03750dc0..8b11e0850ff6 100644
> --- a/packet.c
> +++ b/packet.c
> @@ -64,15 +64,16 @@ static int packet_check_range(const struct pool *p, const char *ptr, size_t len,
>  /**
>   * packet_add_do() - Add data as packet descriptor to given pool
>   * @p:		Existing pool
> - * @len:	Length of new descriptor
> - * @start:	Start of data
> + * @data:	Data to add
>   * @func:	For tracing: name of calling function, NULL means no trace()
>   * @line:	For tracing: caller line of function call
>   */
> -void packet_add_do(struct pool *p, size_t len, const char *start,
> +void packet_add_do(struct pool *p, struct iov_tail *data,
>  		   const char *func, int line)
>  {
>  	size_t idx = p->count;
> +	const char *start;
> +	size_t len;
>  
>  	if (idx >= p->size) {
>  		trace("add packet index %zu to pool with size %zu, %s:%i",
> @@ -80,6 +81,14 @@ void packet_add_do(struct pool *p, size_t len, const char *start,
>  		return;
>  	}
>  
> +	if (!iov_tail_prune(data))
> +		return;
> +
> +	ASSERT(data->cnt == 1); /* we don't support iovec */
> +
> +	len = data->iov[0].iov_len - data->off;
> +	start = (char *)data->iov[0].iov_base + data->off;
> +
>  	if (packet_check_range(p, start, len, func, line))
>  		return;
>  
> diff --git a/packet.h b/packet.h
> index d099f026631b..f386295da203 100644
> --- a/packet.h
> +++ b/packet.h
> @@ -6,6 +6,8 @@
>  #ifndef PACKET_H
>  #define PACKET_H
>  
> +#include "iov.h"
> +
>  /* Maximum size of a single packet stored in pool, including headers */
>  #define PACKET_MAX_LEN	UINT16_MAX
>  
> @@ -28,15 +30,15 @@ struct pool {
>  };
>  
>  int vu_packet_check_range(void *buf, const char *ptr, size_t len);
> -void packet_add_do(struct pool *p, size_t len, const char *start,
> +void packet_add_do(struct pool *p, struct iov_tail *data,
>  		   const char *func, int line);
>  void *packet_get_do(const struct pool *p, const size_t idx,
>  		    size_t offset, size_t len, size_t *left,
>  		    const char *func, int line);
>  void pool_flush(struct pool *p);
>  
> -#define packet_add(p, len, start)					\
> -	packet_add_do(p, len, start, __func__, __LINE__)
> +#define packet_add(p, data)					\
> +	packet_add_do(p, data, __func__, __LINE__)
>  
>  #define packet_get(p, idx, offset, len, left)				\
>  	packet_get_do(p, idx, offset, len, left, __func__, __LINE__)
> diff --git a/tap.c b/tap.c
> index ab3effe80f89..4b54807c4101 100644
> --- a/tap.c
> +++ b/tap.c
> @@ -683,6 +683,7 @@ resume:
>  		size_t l2len, l3len, hlen, l4len;
>  		const struct ethhdr *eh;
>  		const struct udphdr *uh;
> +		struct iov_tail data;
>  		struct iphdr *iph;
>  		const char *l4h;
>  
> @@ -694,7 +695,8 @@ resume:
>  		if (ntohs(eh->h_proto) == ETH_P_ARP) {
>  			PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
>  
> -			packet_add(pkt, l2len, (char *)eh);
> +			data = IOV_TAIL_FROM_BUF((void *)eh, l2len, 0);
> +			packet_add(pkt, &data);
>  			arp(c, pkt);
>  			continue;
>  		}
> @@ -739,7 +741,8 @@ resume:
>  
>  			tap_packet_debug(iph, NULL, NULL, 0, NULL, 1);
>  
> -			packet_add(pkt, l4len, l4h);
> +			data = IOV_TAIL_FROM_BUF((void *)l4h, l4len, 0);
> +			packet_add(pkt, &data);
>  			icmp_tap_handler(c, PIF_TAP, AF_INET,
>  					 &iph->saddr, &iph->daddr,
>  					 pkt, now);
> @@ -753,7 +756,8 @@ resume:
>  		if (iph->protocol == IPPROTO_UDP) {
>  			PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
>  
> -			packet_add(pkt, l2len, (char *)eh);
> +			data = IOV_TAIL_FROM_BUF((void *)eh, l2len, 0);
> +			packet_add(pkt, &data);
>  			if (dhcp(c, pkt))
>  				continue;
>  		}
> @@ -802,7 +806,8 @@ resume:
>  #undef L4_SET
>  
>  append:
> -		packet_add((struct pool *)&seq->p, l4len, l4h);
> +		data = IOV_TAIL_FROM_BUF((void *)l4h, l4len, 0);
> +		packet_add((struct pool *)&seq->p, &data);
>  	}
>  
>  	for (j = 0, seq = tap4_l4; j < seq_count; j++, seq++) {
> @@ -858,6 +863,7 @@ resume:
>  		struct in6_addr *saddr, *daddr;
>  		const struct ethhdr *eh;
>  		const struct udphdr *uh;
> +		struct iov_tail data;
>  		struct ipv6hdr *ip6h;
>  		uint8_t proto;
>  		char *l4h;
> @@ -911,7 +917,8 @@ resume:
>  			if (l4len < sizeof(struct icmp6hdr))
>  				continue;
>  
> -			packet_add(pkt, l4len, l4h);
> +			data = IOV_TAIL_FROM_BUF(l4h, l4len, 0);
> +			packet_add(pkt, &data);
>  
>  			if (ndp(c, (struct icmp6hdr *)l4h, saddr, pkt))
>  				continue;
> @@ -930,7 +937,8 @@ resume:
>  		if (proto == IPPROTO_UDP) {
>  			PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
>  
> -			packet_add(pkt, l4len, l4h);
> +			data = IOV_TAIL_FROM_BUF(l4h, l4len, 0);
> +			packet_add(pkt, &data);
>  
>  			if (dhcpv6(c, pkt, saddr, daddr))
>  				continue;
> @@ -984,7 +992,8 @@ resume:
>  #undef L4_SET
>  
>  append:
> -		packet_add((struct pool *)&seq->p, l4len, l4h);
> +		data = IOV_TAIL_FROM_BUF(l4h, l4len, 0);
> +		packet_add((struct pool *)&seq->p, &data);
>  	}
>  
>  	for (j = 0, seq = tap6_l4; j < seq_count; j++, seq++) {
> @@ -1058,18 +1067,13 @@ void tap_add_packet(struct ctx *c, struct iov_tail *data)
>  		proto_update_l2_buf(c->guest_mac, NULL);
>  	}
>  
> -	iov_tail_prune(data);
> -	ASSERT(data->cnt == 1); /* packet_add() doesn't support iovec */
> -
>  	switch (ntohs(eh->h_proto)) {
>  	case ETH_P_ARP:
>  	case ETH_P_IP:
> -		packet_add(pool_tap4, data->iov[0].iov_len - data->off,
> -			   (char *)data->iov[0].iov_base + data->off);
> +		packet_add(pool_tap4, data);
>  		break;
>  	case ETH_P_IPV6:
> -		packet_add(pool_tap6, data->iov[0].iov_len - data->off,
> -			   (char *)data->iov[0].iov_base + data->off);
> +		packet_add(pool_tap6, data);
>  		break;
>  	default:
>  		break;

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 05/18] packet: Add packet_base()
  2025-04-02 17:23 ` [PATCH 05/18] packet: Add packet_base() Laurent Vivier
@ 2025-04-03  4:59   ` David Gibson
  0 siblings, 0 replies; 34+ messages in thread
From: David Gibson @ 2025-04-03  4:59 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: passt-dev

[-- Attachment #1: Type: text/plain, Size: 3524 bytes --]

On Wed, Apr 02, 2025 at 07:23:30PM +0200, Laurent Vivier wrote:
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>

Needs commit message.

> ---
>  packet.c | 37 +++++++++++++++++++++++++++++++++++++
>  packet.h |  7 +++++++
>  2 files changed, 44 insertions(+)
> 
> diff --git a/packet.c b/packet.c
> index 8b11e0850ff6..25ede38b94cb 100644
> --- a/packet.c
> +++ b/packet.c
> @@ -156,6 +156,43 @@ void *packet_get_do(const struct pool *p, size_t idx, size_t offset,
>  	return ptr;
>  }
>  
> +/**
> + * packet_base_do() - Get data range from packet descriptor from given pool
> + * @p:		Packet pool
> + * @idx:	Index of packet descriptor in pool
> + * @func:	For tracing: name of calling function, NULL means no trace()
> + * @line:	For tracing: caller line of function call
> + *
> + * Return: pointer to start of data range, NULL on invalid range or descriptor

All these comments need to be updated, they seem to be based on (an
old version of?) packet_get_do and are no longer accurate.

"packet_base()" also doesn't really convey the meaning to me.  Not
sure what a better name is, though.


> + */
> +/* cppcheck-suppress unusedFunction */
> +bool packet_base_do(const struct pool *p, size_t idx,
> +		   struct iov_tail *data,
> +		   const char *func, int line)
> +{
> +	size_t i;
> +
> +	if (idx >= p->size || idx >= p->count) {
> +		if (func) {
> +			trace("packet %zu from pool size: %zu, count: %zu, "
> +			      "%s:%i", idx, p->size, p->count, func, line);
> +		}
> +		return NULL;
> +	}
> +
> +	data->cnt = 1;
> +	data->off = 0;
> +	data->iov = &p->pkt[idx];
> +
> +	for (i = 0; i < data->cnt; i++) {
> +		if (packet_check_range(p, data->iov[i].iov_base,
> +				       data->iov[i].iov_len, func, line))
> +			return false;

This looks to be based logically on versions of packet_get_do() before
I did my rework in 38bcce997 and surrounding commits.  If we get a
packet_check_range() failure here it means the packet pool is already
corrupt and we should just ASSERT.

The conditions on func being non-NULL should also be gone.


> +	}
> +
> +	return true;
> +}
> +
>  /**
>   * pool_flush() - Flush a packet pool
>   * @p:		Pointer to packet pool
> diff --git a/packet.h b/packet.h
> index f386295da203..35058e747a43 100644
> --- a/packet.h
> +++ b/packet.h
> @@ -35,6 +35,9 @@ void packet_add_do(struct pool *p, struct iov_tail *data,
>  void *packet_get_do(const struct pool *p, const size_t idx,
>  		    size_t offset, size_t len, size_t *left,
>  		    const char *func, int line);
> +bool packet_base_do(const struct pool *p, const size_t idx,
> +		    struct iov_tail *data,
> +		    const char *func, int line);
>  void pool_flush(struct pool *p);
>  
>  #define packet_add(p, data)					\
> @@ -43,9 +46,13 @@ void pool_flush(struct pool *p);
>  #define packet_get(p, idx, offset, len, left)				\
>  	packet_get_do(p, idx, offset, len, left, __func__, __LINE__)
>  
> +
>  #define packet_get_try(p, idx, offset, len, left)			\
>  	packet_get_do(p, idx, offset, len, left, NULL, 0)
>  
> +#define packet_base(p, idx, data)					\
> +	packet_base_do(p, idx, data, __func__, __LINE__)
> +
>  #define PACKET_POOL_DECL(_name, _size, _buf)				\
>  struct _name ## _t {							\
>  	char *buf;							\

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 06/18] arp: Convert to iov_tail
  2025-04-02 17:23 ` [PATCH 06/18] arp: Convert to iov_tail Laurent Vivier
@ 2025-04-03  5:00   ` David Gibson
  0 siblings, 0 replies; 34+ messages in thread
From: David Gibson @ 2025-04-03  5:00 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: passt-dev

[-- Attachment #1: Type: text/plain, Size: 1860 bytes --]

On Wed, Apr 02, 2025 at 07:23:31PM +0200, Laurent Vivier wrote:
> Use packet_base() and extract headers using IOV_REMOVE_HEADER()
> rather than packet_get().
> 
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
>  arp.c    | 12 +++++++++---
>  packet.c |  1 -
>  2 files changed, 9 insertions(+), 4 deletions(-)
> 
> diff --git a/arp.c b/arp.c
> index 9d68d7c3b602..cd0e15de7de0 100644
> --- a/arp.c
> +++ b/arp.c
> @@ -77,11 +77,17 @@ int arp(const struct ctx *c, const struct pool *p)
>  	const struct ethhdr *eh;
>  	const struct arphdr *ah;
>  	const struct arpmsg *am;
> +	struct iov_tail data;
> +	struct arphdr  ahc;
> +	struct ethhdr ehc;
> +	struct arpmsg amc;
>  
> -	eh = packet_get(p, 0, 0,			 sizeof(*eh), NULL);
> -	ah = packet_get(p, 0, sizeof(*eh),		 sizeof(*ah), NULL);
> -	am = packet_get(p, 0, sizeof(*eh) + sizeof(*ah), sizeof(*am), NULL);
> +	if (!packet_base(p, 0, &data))
> +		return -1;
>  
> +	eh = IOV_REMOVE_HEADER(&data, ehc);
> +	ah = IOV_REMOVE_HEADER(&data, ahc);
> +	am = IOV_REMOVE_HEADER(&data, amc);
>  	if (!eh || !ah || !am)
>  		return -1;
>  
> diff --git a/packet.c b/packet.c
> index 25ede38b94cb..8066ac12502b 100644
> --- a/packet.c
> +++ b/packet.c
> @@ -165,7 +165,6 @@ void *packet_get_do(const struct pool *p, size_t idx, size_t offset,
>   *
>   * Return: pointer to start of data range, NULL on invalid range or descriptor
>   */
> -/* cppcheck-suppress unusedFunction */
>  bool packet_base_do(const struct pool *p, size_t idx,
>  		   struct iov_tail *data,
>  		   const char *func, int line)

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 07/18] ndp: Convert to iov_tail
  2025-04-02 17:23 ` [PATCH 07/18] ndp: " Laurent Vivier
@ 2025-04-03  5:01   ` David Gibson
  0 siblings, 0 replies; 34+ messages in thread
From: David Gibson @ 2025-04-03  5:01 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: passt-dev

[-- Attachment #1: Type: text/plain, Size: 1073 bytes --]

On Wed, Apr 02, 2025 at 07:23:32PM +0200, Laurent Vivier wrote:
> Use packet_base() and extract headers using IOV_REMOVE_HEADER()
> rather than packet_get().
> 
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
>  ndp.c | 7 ++++++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/ndp.c b/ndp.c
> index ded2081ddd1d..64e25d5455b4 100644
> --- a/ndp.c
> +++ b/ndp.c
> @@ -351,8 +351,13 @@ int ndp(const struct ctx *c, const struct icmp6hdr *ih,
>  
>  	if (ih->icmp6_type == NS) {
>  		const struct ndp_ns *ns;
> +		struct iov_tail data;
> +		struct ndp_ns nsc;
>  
> -		ns = packet_get(p, 0, 0, sizeof(struct ndp_ns), NULL);
> +		if (!packet_base(p, 0, &data))
> +			return -1;
> +
> +		ns = IOV_REMOVE_HEADER(&data, nsc);
>  		if (!ns)
>  			return -1;
>  

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 08/18] icmp: Convert to iov_tail
  2025-04-02 17:23 ` [PATCH 08/18] icmp: " Laurent Vivier
@ 2025-04-03  5:04   ` David Gibson
  0 siblings, 0 replies; 34+ messages in thread
From: David Gibson @ 2025-04-03  5:04 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: passt-dev

[-- Attachment #1: Type: text/plain, Size: 2864 bytes --]

On Wed, Apr 02, 2025 at 07:23:33PM +0200, Laurent Vivier wrote:
> Use packet_base() and extract headers using IOV_PEEK_HEADER()
> rather than packet_get().
> 
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> ---
>  icmp.c | 38 +++++++++++++++++++++++---------------
>  1 file changed, 23 insertions(+), 15 deletions(-)
> 
> diff --git a/icmp.c b/icmp.c
> index 7e2b3423a8d1..119502c0c340 100644
> --- a/icmp.c
> +++ b/icmp.c
> @@ -241,24 +241,25 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
>  	struct icmp_ping_flow *pingf;
>  	const struct flowside *tgt;
>  	union sockaddr_inany sa;
> -	size_t dlen, l4len;
> +	struct iov_tail data;
> +	struct msghdr msh;
>  	uint16_t id, seq;
>  	union flow *flow;
>  	uint8_t proto;
> -	socklen_t sl;
> -	void *pkt;
>  
>  	(void)saddr;
>  	ASSERT(pif == PIF_TAP);
>  
> +	if (!packet_base(p, 0, &data))
> +		return -1;
> +
>  	if (af == AF_INET) {
>  		const struct icmphdr *ih;
> +		struct icmphdr ihc;
>  
> -		if (!(pkt = packet_get(p, 0, 0, sizeof(*ih), &dlen)))
> -			return 1;
> -
> -		ih =  (struct icmphdr *)pkt;
> -		l4len = dlen + sizeof(*ih);
> +		ih = IOV_PEEK_HEADER(&data, ihc);
> +		if (!ih)
> +			return -1;
>  
>  		if (ih->type != ICMP_ECHO)
>  			return 1;
> @@ -268,12 +269,11 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
>  		seq = ntohs(ih->un.echo.sequence);
>  	} else if (af == AF_INET6) {
>  		const struct icmp6hdr *ih;
> +		struct icmp6hdr ihc;
>  
> -		if (!(pkt = packet_get(p, 0, 0, sizeof(*ih), &dlen)))
> -			return 1;
> -
> -		ih = (struct icmp6hdr *)pkt;
> -		l4len = dlen + sizeof(*ih);
> +		ih = IOV_PEEK_HEADER(&data, ihc);
> +		if (!ih)
> +			return -1;
>  
>  		if (ih->icmp6_type != ICMPV6_ECHO_REQUEST)
>  			return 1;
> @@ -298,8 +298,16 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
>  	ASSERT(flow_proto[pingf->f.type] == proto);
>  	pingf->ts = now->tv_sec;
>  
> -	pif_sockaddr(c, &sa, &sl, PIF_HOST, &tgt->eaddr, 0);
> -	if (sendto(pingf->sock, pkt, l4len, MSG_NOSIGNAL, &sa.sa, sl) < 0) {
> +	ASSERT(data.off == 0);
> +	msh.msg_name = &sa;
> +	msh.msg_iov = (struct iovec *)data.iov;
> +	msh.msg_iovlen = data.cnt;
> +	msh.msg_control = NULL;
> +	msh.msg_controllen = 0;
> +	msh.msg_flags = 0;

I think we're going to want a helper to build a msghdr from an iov_tail.

> +
> +	pif_sockaddr(c, &sa, &msh.msg_namelen, PIF_HOST, &tgt->eaddr, 0);
> +	if (sendmsg(pingf->sock, &msh, MSG_NOSIGNAL) < 0) {
>  		flow_dbg_perror(pingf, "failed to relay request to socket");
>  	} else {
>  		flow_dbg(pingf,

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 09/18] udp: Convert to iov_tail
  2025-04-02 17:23 ` [PATCH 09/18] udp: " Laurent Vivier
@ 2025-04-03  5:11   ` David Gibson
  0 siblings, 0 replies; 34+ messages in thread
From: David Gibson @ 2025-04-03  5:11 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: passt-dev

[-- Attachment #1: Type: text/plain, Size: 3708 bytes --]

On Wed, Apr 02, 2025 at 07:23:34PM +0200, Laurent Vivier wrote:
> Use packet_base() and extract headers using IOV_REMOVE_HEADER()
> and IOV_PEEK_HEADER() rather than packet_get().
> 
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> ---
>  iov.c |  1 -
>  udp.c | 37 ++++++++++++++++++++++++++-----------
>  2 files changed, 26 insertions(+), 12 deletions(-)
> 
> diff --git a/iov.c b/iov.c
> index 508fb6da91fb..0c69379316aa 100644
> --- a/iov.c
> +++ b/iov.c
> @@ -173,7 +173,6 @@ size_t iov_size(const struct iovec *iov, size_t iov_cnt)
>   * Returns:      The number of elements successfully copied to the destination
>   *		 iov array.
>   */
> -/* cppcheck-suppress unusedFunction */
>  unsigned iov_copy(struct iovec *dst_iov, size_t dst_iov_cnt,
>  		  const struct iovec *iov, size_t iov_cnt,
>  		  size_t offset, size_t bytes)
> diff --git a/udp.c b/udp.c
> index 80520cbdf188..03906d72e75c 100644
> --- a/udp.c
> +++ b/udp.c
> @@ -858,15 +858,20 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
>  	struct iovec m[UIO_MAXIOV];
>  	const struct udphdr *uh;
>  	struct udp_flow *uflow;
> -	int i, s, count = 0;
> +	int i, j, s, count = 0;
> +	struct iov_tail data;
>  	flow_sidx_t tosidx;
>  	in_port_t src, dst;
> +	struct udphdr uhc;
>  	uint8_t topif;
>  	socklen_t sl;
>  
>  	ASSERT(!c->no_udp);
>  
> -	uh = packet_get(p, idx, 0, sizeof(*uh), NULL);
> +	if (!packet_base(p, idx, &data))
> +		return 1;
> +
> +	uh = IOV_PEEK_HEADER(&data, uhc);
>  	if (!uh)
>  		return 1;
>  
> @@ -903,23 +908,33 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
>  
>  	pif_sockaddr(c, &to_sa, &sl, topif, &toside->eaddr, toside->eport);
>  
> -	for (i = 0; i < (int)p->count - idx; i++) {
> -		struct udphdr *uh_send;
> -		size_t len;
> +	for (i = 0, j = 0; i < (int)p->count - idx && j < UIO_MAXIOV; i++) {
> +		const struct udphdr *uh_send;
> +
> +		if (!packet_base(p, idx + i, &data))
> +			return p->count - idx;
>  
> -		uh_send = packet_get(p, idx + i, 0, sizeof(*uh), &len);
> +		uh_send = IOV_REMOVE_HEADER(&data, uhc);
>  		if (!uh_send)
>  			return p->count - idx;
>  
> +		iov_tail_prune(&data);

Maybe we should make remove_header always prune, rather than having to
do it manually.

> +
> +		if (data.cnt + j >= UIO_MAXIOV)

Obviously it's equivalent, but this would make more intuitive sense to
me as (j + data.cnt): the amount we've already used in our array, plus
the extra we're going to need.

> +			return p->count - idx;
> +
>  		mm[i].msg_hdr.msg_name = &to_sa;
>  		mm[i].msg_hdr.msg_namelen = sl;
>  
> -		if (len) {
> -			m[i].iov_base = (char *)(uh_send + 1);
> -			m[i].iov_len = len;
> +		if (data.cnt ) {

Stray space.

> +			unsigned int len;
> +
> +			len = iov_copy(&m[j], UIO_MAXIOV - j,
> +				       &data.iov[0], data.cnt, data.off, SIZE_MAX);

So, as I mentioned on iov_copy(), it doesn't obviously report the case
where it copies less than expected not because the source is missing
data, but because the destination doesn't have enough iovecs.  You've
avoided this case with a test above, but that's a bit non-obvious, it
would be nice if we could just call this and check for an error.

> -			mm[i].msg_hdr.msg_iov = m + i;
> -			mm[i].msg_hdr.msg_iovlen = 1;
> +			mm[i].msg_hdr.msg_iov = &m[j];
> +			mm[i].msg_hdr.msg_iovlen = len;
> +			j += len;
>  		} else {
>  			mm[i].msg_hdr.msg_iov = NULL;
>  			mm[i].msg_hdr.msg_iovlen = 0;

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 10/18] tcp: Convert tcp_tap_handler() to use iov_tail
  2025-04-02 17:23 ` [PATCH 10/18] tcp: Convert tcp_tap_handler() to use iov_tail Laurent Vivier
@ 2025-04-03  5:14   ` David Gibson
  0 siblings, 0 replies; 34+ messages in thread
From: David Gibson @ 2025-04-03  5:14 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: passt-dev

[-- Attachment #1: Type: text/plain, Size: 2141 bytes --]

On Wed, Apr 02, 2025 at 07:23:35PM +0200, Laurent Vivier wrote:
> Use packet_base() and extract headers using IOV_REMOVE_HEADER()
> and iov_peek_header_() rather than packet_get().
> 
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> ---
>  tcp.c | 17 +++++++++++++----
>  1 file changed, 13 insertions(+), 4 deletions(-)
> 
> diff --git a/tcp.c b/tcp.c
> index a4c840e6721c..790714a08793 100644
> --- a/tcp.c
> +++ b/tcp.c
> @@ -310,6 +310,8 @@
>  #include "tcp_buf.h"
>  #include "tcp_vu.h"
>  

Some explanation of the derivation would be good here.

> +#define OPTLEN_MAX (((1UL << 4) - 6) * 4UL)
> +
>  #ifndef __USE_MISC
>  /* From Linux UAPI, missing in netinet/tcp.h provided by musl */
>  struct tcp_repair_opt {
> @@ -1957,7 +1959,10 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
>  {
>  	struct tcp_tap_conn *conn;
>  	const struct tcphdr *th;
> +	char optsc[OPTLEN_MAX];
> +	struct iov_tail data;
>  	size_t optlen, len;
> +	struct tcphdr thc;
>  	const char *opts;
>  	union flow *flow;
>  	flow_sidx_t sidx;
> @@ -1966,15 +1971,19 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
>  
>  	(void)pif;
>  
> -	th = packet_get(p, idx, 0, sizeof(*th), &len);
> +	if (!packet_base(p, idx, &data))
> +		return 1;
> +
> +	len = iov_tail_size(&data);

'l4len', please.

> +
> +	th = IOV_REMOVE_HEADER(&data, thc);
>  	if (!th)
>  		return 1;
> -	len += sizeof(*th);
>  
>  	optlen = th->doff * 4UL - sizeof(*th);
>  	/* Static checkers might fail to see this: */
> -	optlen = MIN(optlen, ((1UL << 4) /* from doff width */ - 6) * 4UL);
> -	opts = packet_get(p, idx, sizeof(*th), optlen, NULL);
> +	optlen = MIN(optlen, OPTLEN_MAX);
> +	opts = (char *)iov_peek_header_(&data, &optsc[0], optlen, 1);
>  
>  	sidx = flow_lookup_af(c, IPPROTO_TCP, PIF_TAP, af, saddr, daddr,
>  			      ntohs(th->source), ntohs(th->dest));

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 11/18] tcp: Convert tcp_data_from_tap() to use iov_tail
  2025-04-02 17:23 ` [PATCH 11/18] tcp: Convert tcp_data_from_tap() " Laurent Vivier
@ 2025-04-03  5:20   ` David Gibson
  0 siblings, 0 replies; 34+ messages in thread
From: David Gibson @ 2025-04-03  5:20 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: passt-dev

[-- Attachment #1: Type: text/plain, Size: 2588 bytes --]

On Wed, Apr 02, 2025 at 07:23:36PM +0200, Laurent Vivier wrote:
> Use packet_base() and extract headers using IOV_PEEK_HEADER()
> rather than packet_get().
> 
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> ---
>  tcp.c | 24 ++++++++++++++----------
>  1 file changed, 14 insertions(+), 10 deletions(-)
> 
> diff --git a/tcp.c b/tcp.c
> index 790714a08793..a9c04551d9d6 100644
> --- a/tcp.c
> +++ b/tcp.c
> @@ -1643,14 +1643,19 @@ static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn,
>  	for (i = idx, iov_i = 0; i < (int)p->count; i++) {
>  		uint32_t seq, seq_offset, ack_seq;
>  		const struct tcphdr *th;
> -		char *data;
> +		struct iov_tail data;
> +		unsigned int count;
> +		struct tcphdr thc;
>  		size_t off;
>  
> -		th = packet_get(p, i, 0, sizeof(*th), &len);
> +		if (!packet_base(p, i, &data))
> +			return -1;
> +
> +		th = IOV_PEEK_HEADER(&data, thc);
>  		if (!th)
>  			return -1;
> -		len += sizeof(*th);
>  
> +		len = iov_tail_size(&data);
>  		off = th->doff * 4UL;
>  		if (off < sizeof(*th) || off > len)
>  			return -1;
> @@ -1661,9 +1666,7 @@ static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn,
>  		}
>  
>  		len -= off;
> -		data = packet_get(p, i, off, len, NULL);
> -		if (!data)
> -			continue;
> +		data.off = off;

You can use iov_drop() rather than reaching into the internals of
iov_tail here, no?

>  		seq = ntohl(th->seq);
>  		if (SEQ_LT(seq, conn->seq_from_tap) && len <= 1) {
> @@ -1737,10 +1740,11 @@ static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn,
>  			continue;
>  		}
>  
> -		tcp_iov[iov_i].iov_base = data + seq_offset;
> -		tcp_iov[iov_i].iov_len = len - seq_offset;
> -		seq_from_tap += tcp_iov[iov_i].iov_len;
> -		iov_i++;
> +		count = iov_copy(&tcp_iov[iov_i], UIO_MAXIOV - iov_i,
> +				 &data.iov[0], data.cnt, data.off + seq_offset,
> +				 len - seq_offset);

Here again it matters if you run out of space in the destination iov,
and I don't think you have a check above which prevents it.

> +		seq_from_tap += iov_size(&tcp_iov[iov_i], count);

We already called iov_size() on &data above.  We should be able to
derive the total length here from that minus headers, without having
to recount the IOV, no?

> +		iov_i += count;
>  
>  		if (keep == i)
>  			keep = -1;

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 12/18] dhcpv6: Convert to iov_tail
  2025-04-02 17:23 ` [PATCH 12/18] dhcpv6: Convert to iov_tail Laurent Vivier
@ 2025-04-03  5:21   ` David Gibson
  0 siblings, 0 replies; 34+ messages in thread
From: David Gibson @ 2025-04-03  5:21 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: passt-dev

[-- Attachment #1: Type: text/plain, Size: 1715 bytes --]

On Wed, Apr 02, 2025 at 07:23:37PM +0200, Laurent Vivier wrote:
> Use packet_base() and extract headers using IOV_REMOVE_HEADER()
> and IOV_PEEK_HEADER() rather than packet_get().
> 
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> ---
>  dhcpv6.c | 11 +++++++++--
>  1 file changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/dhcpv6.c b/dhcpv6.c
> index 373a98869f9b..eb3f188af0b5 100644
> --- a/dhcpv6.c
> +++ b/dhcpv6.c
> @@ -496,9 +496,15 @@ int dhcpv6(struct ctx *c, const struct pool *p,
>  	const struct msg_hdr *mh;
>  	const struct udphdr *uh;
>  	struct opt_hdr *bad_ia;
> +	struct iov_tail data;
> +	struct msg_hdr mhc;
> +	struct udphdr uhc;
>  	size_t mlen, n;
>  
> -	uh = packet_get(p, 0, 0, sizeof(*uh), &mlen);
> +	if (!packet_base(p, 0, &data))
> +		return -1;
> +
> +	uh = IOV_REMOVE_HEADER(&data, uhc);
>  	if (!uh)
>  		return -1;
>  
> @@ -511,6 +517,7 @@ int dhcpv6(struct ctx *c, const struct pool *p,
>  	if (!IN6_IS_ADDR_MULTICAST(daddr))
>  		return -1;
>  
> +	mlen = iov_tail_size(&data);

The total length is useful in enough cases that I wonder if it makes
sense for packet_base() to return it.

>  	if (mlen + sizeof(*uh) != ntohs(uh->len) || mlen < sizeof(*mh))
>  		return -1;
>  
> @@ -518,7 +525,7 @@ int dhcpv6(struct ctx *c, const struct pool *p,
>  
>  	src = &c->ip6.our_tap_ll;
>  
> -	mh = packet_get(p, 0, sizeof(*uh), sizeof(*mh), NULL);
> +	mh = IOV_PEEK_HEADER(&data, mhc);
>  	if (!mh)
>  		return -1;
>  

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 13/18] dhcpv6: move offset initialization out of dhcpv6_opt()
  2025-04-02 17:23 ` [PATCH 13/18] dhcpv6: move offset initialization out of dhcpv6_opt() Laurent Vivier
@ 2025-04-03  5:25   ` David Gibson
  0 siblings, 0 replies; 34+ messages in thread
From: David Gibson @ 2025-04-03  5:25 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: passt-dev

[-- Attachment #1: Type: text/plain, Size: 3658 bytes --]

On Wed, Apr 02, 2025 at 07:23:38PM +0200, Laurent Vivier wrote:

Needs a commit message.

> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> ---
>  dhcpv6.c | 22 +++++++++++-----------
>  1 file changed, 11 insertions(+), 11 deletions(-)
> 
> diff --git a/dhcpv6.c b/dhcpv6.c
> index eb3f188af0b5..ccc64172a480 100644
> --- a/dhcpv6.c
> +++ b/dhcpv6.c
> @@ -54,14 +54,14 @@ struct opt_hdr {
>  	uint16_t l;
>  } __attribute__((packed));
>  
> +#define UDP_MSG_HDR_SIZE	(sizeof(struct udphdr) + sizeof(struct msg_hdr))
>  # define OPT_SIZE_CONV(x)	(htons_constant(x))
>  #define OPT_SIZE(x)		OPT_SIZE_CONV(sizeof(struct opt_##x) -	\
>  					      sizeof(struct opt_hdr))
>  #define OPT_VSIZE(x)		(sizeof(struct opt_##x) - 		\
>  				 sizeof(struct opt_hdr))
>  #define OPT_MAX_SIZE		IPV6_MIN_MTU - (sizeof(struct ipv6hdr) + \
> -						sizeof(struct udphdr) + \
> -						sizeof(struct msg_hdr))
> +						UDP_MSG_HDR_SIZE)
>  
>  /**
>   * struct opt_client_id - DHCPv6 Client Identifier option
> @@ -290,8 +290,7 @@ static struct opt_hdr *dhcpv6_opt(const struct pool *p, size_t *offset,
>  	struct opt_hdr *o;
>  	size_t left;
>  
> -	if (!*offset)
> -		*offset = sizeof(struct udphdr) + sizeof(struct msg_hdr);
> +	ASSERT(*offset >= UDP_MSG_HDR_SIZE);
>  
>  	while ((o = packet_get_try(p, 0, *offset, sizeof(*o), &left))) {
>  		unsigned int opt_len = ntohs(o->l) + sizeof(*o);
> @@ -327,7 +326,7 @@ static struct opt_hdr *dhcpv6_ia_notonlink(const struct pool *p,
>  	size_t offset;
>  
>  	foreach(ia_type, ia_types) {
> -		offset = 0;
> +		offset = UDP_MSG_HDR_SIZE;
>  		while ((ia = dhcpv6_opt(p, &offset, *ia_type))) {
>  			if (ntohs(ia->l) < OPT_VSIZE(ia_na))
>  				return NULL;
> @@ -464,8 +463,9 @@ static size_t dhcpv6_client_fqdn_fill(const struct pool *p, const struct ctx *c,
>  
>  	o = (struct opt_client_fqdn *)(buf + offset);
>  	encode_domain_name(o->domain_name, c->fqdn);
> -	req_opt = (struct opt_client_fqdn *)dhcpv6_opt(p, &(size_t){ 0 },
> -						       OPT_CLIENT_FQDN);
> +	req_opt = (struct opt_client_fqdn *)dhcpv6_opt(p,
> +						&(size_t){ UDP_MSG_HDR_SIZE },
> +						OPT_CLIENT_FQDN);
>  	if (req_opt && req_opt->flags & 0x01 /* S flag */)
>  		o->flags = 0x02 /* O flag */;
>  	else
> @@ -529,15 +529,15 @@ int dhcpv6(struct ctx *c, const struct pool *p,
>  	if (!mh)
>  		return -1;
>  
> -	client_id = dhcpv6_opt(p, &(size_t){ 0 }, OPT_CLIENTID);
> +	client_id = dhcpv6_opt(p, &(size_t){ UDP_MSG_HDR_SIZE }, OPT_CLIENTID);
>  	if (!client_id || ntohs(client_id->l) > OPT_VSIZE(client_id))
>  		return -1;
>  
> -	server_id = dhcpv6_opt(p, &(size_t){ 0 }, OPT_SERVERID);
> +	server_id = dhcpv6_opt(p, &(size_t){ UDP_MSG_HDR_SIZE }, OPT_SERVERID);
>  	if (server_id && ntohs(server_id->l) != OPT_VSIZE(server_id))
>  		return -1;
>  
> -	ia =        dhcpv6_opt(p, &(size_t){ 0 }, OPT_IA_NA);
> +	ia =        dhcpv6_opt(p, &(size_t){ UDP_MSG_HDR_SIZE }, OPT_IA_NA);
>  	if (ia && ntohs(ia->l) < MIN(OPT_VSIZE(ia_na), OPT_VSIZE(ia_ta)))
>  		return -1;
>  
> @@ -587,7 +587,7 @@ int dhcpv6(struct ctx *c, const struct pool *p,
>  		    memcmp(&resp.server_id, server_id, sizeof(resp.server_id)))
>  			return -1;
>  
> -		if (ia || dhcpv6_opt(p, &(size_t){ 0 }, OPT_IA_TA))
> +		if (ia || dhcpv6_opt(p, &(size_t){ UDP_MSG_HDR_SIZE }, OPT_IA_TA))
>  			return -1;
>  
>  		info("DHCPv6: received INFORMATION_REQUEST, sending REPLY");

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 14/18] dhcpv6: Use iov_tail in dhcpv6_opt()
  2025-04-02 17:23 ` [PATCH 14/18] dhcpv6: Use iov_tail in dhcpv6_opt() Laurent Vivier
@ 2025-04-03  5:37   ` David Gibson
  0 siblings, 0 replies; 34+ messages in thread
From: David Gibson @ 2025-04-03  5:37 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: passt-dev

[-- Attachment #1: Type: text/plain, Size: 7102 bytes --]

On Wed, Apr 02, 2025 at 07:23:39PM +0200, Laurent Vivier wrote:

Needs a commit message.

> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> ---
>  dhcpv6.c | 57 +++++++++++++++++++++++++++-----------------------------
>  1 file changed, 27 insertions(+), 30 deletions(-)
> 
> diff --git a/dhcpv6.c b/dhcpv6.c
> index ccc64172a480..1e83f2c2ad23 100644
> --- a/dhcpv6.c
> +++ b/dhcpv6.c
> @@ -278,30 +278,25 @@ static struct resp_not_on_link_t {
>  
>  /**
>   * dhcpv6_opt() - Get option from DHCPv6 message
> - * @p:		Packet pool, single packet with UDP header
> - * @offset:	Offset to look at, 0: end of header, set to option start
> + * @data:	Data to look at
>   * @type:	Option type to look up, network order
>   *
>   * Return: pointer to option header, or NULL on malformed or missing option
>   */
> -static struct opt_hdr *dhcpv6_opt(const struct pool *p, size_t *offset,
> -				  uint16_t type)
> +static struct opt_hdr *dhcpv6_opt(struct iov_tail *data, uint16_t type)
>  {
> -	struct opt_hdr *o;
> -	size_t left;
> +	struct opt_hdr *o, oc;
>  
> -	ASSERT(*offset >= UDP_MSG_HDR_SIZE);
> -
> -	while ((o = packet_get_try(p, 0, *offset, sizeof(*o), &left))) {
> +	while ((o = IOV_PEEK_HEADER(data, oc))) {
>  		unsigned int opt_len = ntohs(o->l) + sizeof(*o);
>  
> -		if (ntohs(o->l) > left)
> +		if (opt_len > iov_tail_size(data))
>  			return NULL;

>  
>  		if (o->t == type)
>  			return o;

This is no good.  If peek_header() ended up copying the header you'll
now be returning a pointer to a local variable.

Also, you've only verified the contiguity of the option header with
IOV_PEEK_HEADER, the body of the option could still be discontiguous,
which is not what the callers expect.

> -		*offset += opt_len;
> +		data->off += opt_len;

I think you want iov_drop() or REMOVE_HEADER() here rather than
reaching into the iov_tail structure.

>  	}
>  
>  	return NULL;
> @@ -309,31 +304,31 @@ static struct opt_hdr *dhcpv6_opt(const struct pool *p, size_t *offset,
>  
>  /**
>   * dhcpv6_ia_notonlink() - Check if any IA contains non-appropriate addresses
> - * @p:		Packet pool, single packet starting from UDP header
> + * @data:	Data to look at, packet starting from UDP header
>   * @la:		Address we want to lease to the client
>   *
>   * Return: pointer to non-appropriate IA_NA or IA_TA, if any, NULL otherwise
>   */
> -static struct opt_hdr *dhcpv6_ia_notonlink(const struct pool *p,
> +static struct opt_hdr *dhcpv6_ia_notonlink(const struct iov_tail *data,
>  					   struct in6_addr *la)
>  {
>  	int ia_types[2] = { OPT_IA_NA, OPT_IA_TA }, *ia_type;
>  	const struct opt_ia_addr *opt_addr;
>  	char buf[INET6_ADDRSTRLEN];
>  	struct in6_addr req_addr;
> +	struct iov_tail current;
>  	const struct opt_hdr *h;
>  	struct opt_hdr *ia;
> -	size_t offset;
>  
>  	foreach(ia_type, ia_types) {
> -		offset = UDP_MSG_HDR_SIZE;
> -		while ((ia = dhcpv6_opt(p, &offset, *ia_type))) {
> +		current = *data;
> +		while ((ia = dhcpv6_opt(&current, *ia_type))) {
>  			if (ntohs(ia->l) < OPT_VSIZE(ia_na))
>  				return NULL;
>  
> -			offset += sizeof(struct opt_ia_na);
> +			current.off += sizeof(struct opt_ia_na);
>  
> -			while ((h = dhcpv6_opt(p, &offset, OPT_IAAADR))) {
> +			while ((h = dhcpv6_opt(&current, OPT_IAAADR))) {
>  				if (ntohs(h->l) != OPT_VSIZE(ia_addr))
>  					return NULL;
>  
> @@ -342,7 +337,7 @@ static struct opt_hdr *dhcpv6_ia_notonlink(const struct pool *p,
>  				if (!IN6_ARE_ADDR_EQUAL(la, &req_addr))
>  					goto err;
>  
> -				offset += sizeof(struct opt_ia_addr);
> +				current.off += sizeof(struct opt_ia_addr);
>  			}
>  		}
>  	}
> @@ -434,13 +429,15 @@ search:
>  
>  /**
>   * dhcpv6_client_fqdn_fill() - Fill in client FQDN option
> + * @data:	Data to look at
>   * @c:		Execution context
>   * @buf:	Response message buffer where options will be appended
>   * @offset:	Offset in message buffer for new options
>   *
>   * Return: updated length of response message buffer.
>   */
> -static size_t dhcpv6_client_fqdn_fill(const struct pool *p, const struct ctx *c,
> +static size_t dhcpv6_client_fqdn_fill(struct iov_tail *data,
> +				      const struct ctx *c,
>  				      char *buf, int offset)
>  
>  {
> @@ -463,9 +460,8 @@ static size_t dhcpv6_client_fqdn_fill(const struct pool *p, const struct ctx *c,
>  
>  	o = (struct opt_client_fqdn *)(buf + offset);
>  	encode_domain_name(o->domain_name, c->fqdn);
> -	req_opt = (struct opt_client_fqdn *)dhcpv6_opt(p,
> -						&(size_t){ UDP_MSG_HDR_SIZE },
> -						OPT_CLIENT_FQDN);
> +	data->off += UDP_MSG_HDR_SIZE;
> +	req_opt = (struct opt_client_fqdn *)dhcpv6_opt(data, OPT_CLIENT_FQDN);
>  	if (req_opt && req_opt->flags & 0x01 /* S flag */)
>  		o->flags = 0x02 /* O flag */;
>  	else
> @@ -525,19 +521,19 @@ int dhcpv6(struct ctx *c, const struct pool *p,
>  
>  	src = &c->ip6.our_tap_ll;
>  
> -	mh = IOV_PEEK_HEADER(&data, mhc);
> +	mh = IOV_REMOVE_HEADER(&data, mhc);
>  	if (!mh)
>  		return -1;
>  
> -	client_id = dhcpv6_opt(p, &(size_t){ UDP_MSG_HDR_SIZE }, OPT_CLIENTID);
> +	client_id = dhcpv6_opt(&data, OPT_CLIENTID);
>  	if (!client_id || ntohs(client_id->l) > OPT_VSIZE(client_id))
>  		return -1;
>  
> -	server_id = dhcpv6_opt(p, &(size_t){ UDP_MSG_HDR_SIZE }, OPT_SERVERID);
> +	server_id = dhcpv6_opt(&data, OPT_SERVERID);
>  	if (server_id && ntohs(server_id->l) != OPT_VSIZE(server_id))
>  		return -1;
>  
> -	ia =        dhcpv6_opt(p, &(size_t){ UDP_MSG_HDR_SIZE }, OPT_IA_NA);
> +	ia =        dhcpv6_opt(&data, OPT_IA_NA);
>  	if (ia && ntohs(ia->l) < MIN(OPT_VSIZE(ia_na), OPT_VSIZE(ia_ta)))
>  		return -1;
>  
> @@ -553,7 +549,7 @@ int dhcpv6(struct ctx *c, const struct pool *p,
>  		if (mh->type == TYPE_CONFIRM && server_id)
>  			return -1;
>  
> -		if ((bad_ia = dhcpv6_ia_notonlink(p, &c->ip6.addr))) {
> +		if ((bad_ia = dhcpv6_ia_notonlink(&data, &c->ip6.addr))) {
>  			info("DHCPv6: received CONFIRM with inappropriate IA,"
>  			     " sending NotOnLink status in REPLY");
>  
> @@ -587,7 +583,7 @@ int dhcpv6(struct ctx *c, const struct pool *p,
>  		    memcmp(&resp.server_id, server_id, sizeof(resp.server_id)))
>  			return -1;
>  
> -		if (ia || dhcpv6_opt(p, &(size_t){ UDP_MSG_HDR_SIZE }, OPT_IA_TA))
> +		if (ia || dhcpv6_opt(&data, OPT_IA_TA))
>  			return -1;
>  
>  		info("DHCPv6: received INFORMATION_REQUEST, sending REPLY");
> @@ -619,7 +615,8 @@ int dhcpv6(struct ctx *c, const struct pool *p,
>  	n = offsetof(struct resp_t, client_id) +
>  	    sizeof(struct opt_hdr) + ntohs(client_id->l);
>  	n = dhcpv6_dns_fill(c, (char *)&resp, n);
> -	n = dhcpv6_client_fqdn_fill(p, c, (char *)&resp, n);
> +	packet_base(p, 0, &data);
> +	n = dhcpv6_client_fqdn_fill(&data, c, (char *)&resp, n);
>  
>  	resp.hdr.xid = mh->xid;
>  

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 15/18] dhcp: Convert to iov_tail
  2025-04-02 17:23 ` [PATCH 15/18] dhcp: Convert to iov_tail Laurent Vivier
@ 2025-04-03  5:47   ` David Gibson
  0 siblings, 0 replies; 34+ messages in thread
From: David Gibson @ 2025-04-03  5:47 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: passt-dev

[-- Attachment #1: Type: text/plain, Size: 3912 bytes --]

On Wed, Apr 02, 2025 at 07:23:40PM +0200, Laurent Vivier wrote:
> Use packet_base() and extract headers using IOV_REMOVE_HEADER()
> and IOV_PEEK_HEADER() rather than packet_get().
> 
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> ---
>  dhcp.c | 38 ++++++++++++++++++++++----------------
>  iov.c  |  1 -
>  2 files changed, 22 insertions(+), 17 deletions(-)
> 
> diff --git a/dhcp.c b/dhcp.c
> index b0de04be6f27..f3167beb18b7 100644
> --- a/dhcp.c
> +++ b/dhcp.c
> @@ -302,27 +302,32 @@ static void opt_set_dns_search(const struct ctx *c, size_t max_len)
>   */
>  int dhcp(const struct ctx *c, const struct pool *p)
>  {
> -	size_t mlen, dlen, offset = 0, opt_len, opt_off = 0;
> +	size_t mlen, dlen, opt_len, opt_off = 0;
>  	char macstr[ETH_ADDRSTRLEN];
>  	struct in_addr mask, dst;
>  	const struct ethhdr *eh;
>  	const struct iphdr *iph;
>  	const struct udphdr *uh;
> +	struct iov_tail data;
>  	struct msg const *m;
> +	struct msg mc;
> +	struct ethhdr ehc;
> +	struct iphdr iphc;
> +	struct udphdr uhc;
>  	struct msg reply;
>  	unsigned int i;
>  
> -	eh  = packet_get(p, 0, offset, sizeof(*eh),  NULL);
> -	offset += sizeof(*eh);
> +	if (!packet_base(p, 0, &data))
> +		return -1;
>  
> -	iph = packet_get(p, 0, offset, sizeof(*iph), NULL);
> +	eh = IOV_REMOVE_HEADER(&data, ehc);
> +	iph = IOV_PEEK_HEADER(&data, iphc);
>  	if (!eh || !iph)
>  		return -1;
>  
> -	offset += iph->ihl * 4UL;
> -	uh  = packet_get(p, 0, offset, sizeof(*uh),  &mlen);
> -	offset += sizeof(*uh);
> +	data.off += iph->ihl * 4UL;

iov_drop().

> +	uh = IOV_REMOVE_HEADER(&data, uhc);
>  	if (!uh)
>  		return -1;
>  
> @@ -332,7 +337,9 @@ int dhcp(const struct ctx *c, const struct pool *p)
>  	if (c->no_dhcp)
>  		return 1;
>  
> -	m   = packet_get(p, 0, offset, offsetof(struct msg, o), &opt_len);
> +	mlen = iov_tail_size(&data);
> +	m = (struct msg const *)iov_remove_header_(&data, &mc, offsetof(struct msg, o),
> +						   __alignof__(struct msg));
>  	if (!m						||
>  	    mlen  != ntohs(uh->len) - sizeof(*uh)	||
>  	    mlen  <  offsetof(struct msg, o)		||
> @@ -355,25 +362,24 @@ int dhcp(const struct ctx *c, const struct pool *p)
>  	memset(&reply.file,	0,		sizeof(reply.file));
>  	reply.magic		= m->magic;
>  
> -	offset += offsetof(struct msg, o);
> -
>  	for (i = 0; i < ARRAY_SIZE(opts); i++)
>  		opts[i].clen = -1;
>  
> +	opt_len = iov_tail_size(&data);
>  	while (opt_off + 2 < opt_len) {
> -		const uint8_t *olen, *val;
> +		const uint8_t *olen;
>  		uint8_t *type;
>  
> -		type = packet_get(p, 0, offset + opt_off,	1,	NULL);
> -		olen = packet_get(p, 0, offset + opt_off + 1,	1,	NULL);
> +		type = iov_tail_ptr(&data, opt_off, 1);
> +		olen = iov_tail_ptr(&data, opt_off + 1, 1);

I think you can and should use remove_header() for this as well.

>  		if (!type || !olen)
>  			return -1;
>  
> -		val =  packet_get(p, 0, offset + opt_off + 2,	*olen,	NULL);
> -		if (!val)
> +		if (iov_tail_size(&data) < opt_off + 2 + *olen)
>  			return -1;
>  
> -		memcpy(&opts[*type].c, val, *olen);
> +		iov_to_buf(&data.iov[0], data.cnt, data.off + opt_off + 2,
> +			   &opts[*type].c, *olen);
>  		opts[*type].clen = *olen;
>  		opt_off += *olen + 2;
>  	}
> diff --git a/iov.c b/iov.c
> index 0c69379316aa..66f15585a61a 100644
> --- a/iov.c
> +++ b/iov.c
> @@ -253,7 +253,6 @@ bool iov_drop(struct iov_tail *tail, size_t len)
>   * Returns:	pointer to the data in the tail, NULL if the
>   *		tail doesn't contains @len bytes.
>   */
> -/* cppcheck-suppress unusedFunction */
>  void *iov_tail_ptr(struct iov_tail *tail, size_t off, size_t len)
>  {
>  	const struct iovec *iov;

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2025-04-03  9:16 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-04-02 17:23 [PATCH 00/18] Introduce discontiguous frames management Laurent Vivier
2025-04-02 17:23 ` [PATCH 01/18] arp: Don't mix incoming and outgoing buffers Laurent Vivier
2025-04-03  1:57   ` David Gibson
2025-04-02 17:23 ` [PATCH 02/18] iov: Update IOV_REMOVE_HEADER() and IOV_PEEK_HEADER() Laurent Vivier
2025-04-03  2:36   ` David Gibson
2025-04-02 17:23 ` [PATCH 03/18] tap: Use iov_tail with tap_add_packet() Laurent Vivier
2025-04-03  4:50   ` David Gibson
2025-04-02 17:23 ` [PATCH 04/18] packet: Use iov_tail with packet_add() Laurent Vivier
2025-04-03  4:54   ` David Gibson
2025-04-02 17:23 ` [PATCH 05/18] packet: Add packet_base() Laurent Vivier
2025-04-03  4:59   ` David Gibson
2025-04-02 17:23 ` [PATCH 06/18] arp: Convert to iov_tail Laurent Vivier
2025-04-03  5:00   ` David Gibson
2025-04-02 17:23 ` [PATCH 07/18] ndp: " Laurent Vivier
2025-04-03  5:01   ` David Gibson
2025-04-02 17:23 ` [PATCH 08/18] icmp: " Laurent Vivier
2025-04-03  5:04   ` David Gibson
2025-04-02 17:23 ` [PATCH 09/18] udp: " Laurent Vivier
2025-04-03  5:11   ` David Gibson
2025-04-02 17:23 ` [PATCH 10/18] tcp: Convert tcp_tap_handler() to use iov_tail Laurent Vivier
2025-04-03  5:14   ` David Gibson
2025-04-02 17:23 ` [PATCH 11/18] tcp: Convert tcp_data_from_tap() " Laurent Vivier
2025-04-03  5:20   ` David Gibson
2025-04-02 17:23 ` [PATCH 12/18] dhcpv6: Convert to iov_tail Laurent Vivier
2025-04-03  5:21   ` David Gibson
2025-04-02 17:23 ` [PATCH 13/18] dhcpv6: move offset initialization out of dhcpv6_opt() Laurent Vivier
2025-04-03  5:25   ` David Gibson
2025-04-02 17:23 ` [PATCH 14/18] dhcpv6: Use iov_tail in dhcpv6_opt() Laurent Vivier
2025-04-03  5:37   ` David Gibson
2025-04-02 17:23 ` [PATCH 15/18] dhcp: Convert to iov_tail Laurent Vivier
2025-04-03  5:47   ` David Gibson
2025-04-02 17:23 ` [PATCH 16/18] tap: " Laurent Vivier
2025-04-02 17:23 ` [PATCH 17/18] ip: Use iov_tail in ipv6_l4hdr() Laurent Vivier
2025-04-02 17:23 ` [PATCH 18/18] tap: Convert to iov_tail Laurent Vivier

Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).