public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: passt-dev@passt.top, Stefano Brivio <sbrivio@redhat.com>
Cc: jlesev@gmail.com, David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH 1/8] tcp, tap: Correctly advance through packets in tcp_tap_handler()
Date: Fri,  8 Sep 2023 11:49:46 +1000	[thread overview]
Message-ID: <20230908014953.822952-2-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20230908014953.822952-1-david@gibson.dropbear.id.au>

In both tap4_handler() and tap6_handler(), once we've sorted incoming l3
packets into "sequences", we then step through all the packets in each TCP
sequence calling tcp_tap_handler().  Or so it appears.

In fact, tcp_tap_handler() doesn't take an index and always looks at packet
0 of the sequence, except when it calls tcp_data_from_tap() to process
data packets.  It appears to be written with the idea that the struct pool
is a queue, from which it consumes packets as it processes them, but that's
not how the pool data structure works - they are more like an array of
packets.

We only get away with this, because setup packets for TCP tend to come in
separate batches (because we need to reply in between) and so we only get
a bunch of packets for the same connection together when they're data
packets (tcp_data_from_tap() has its own loop through packets).

Correct this by adding an index parameter to tcp_tap_handler() and altering
the loops in tap.c to step through the pool properly.

Link: https://bugs.passt.top/show_bug.cgi?id=68

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 tap.c | 25 +++++++++++++++++--------
 tcp.c | 28 +++++++++++++++-------------
 tcp.h |  2 +-
 3 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/tap.c b/tap.c
index 8d7859c..445a5ca 100644
--- a/tap.c
+++ b/tap.c
@@ -707,16 +707,20 @@ append:
 
 	for (j = 0, seq = tap4_l4; j < seq_count; j++, seq++) {
 		struct pool *p = (struct pool *)&seq->p;
-		size_t n = p->count;
 
-		tap_packet_debug(NULL, NULL, seq, 0, NULL, n);
+		tap_packet_debug(NULL, NULL, seq, 0, NULL, p->count);
 
 		if (seq->protocol == IPPROTO_TCP) {
+			size_t k;
+
 			if (c->no_tcp)
 				continue;
-			while ((n -= tcp_tap_handler(c, AF_INET, &seq->saddr,
-						     &seq->daddr, p, now)));
+			for (k = 0; k < p->count; )
+				k += tcp_tap_handler(c, AF_INET, &seq->saddr,
+						     &seq->daddr, p, k, now);
 		} else if (seq->protocol == IPPROTO_UDP) {
+			size_t n = p->count;
+
 			if (c->no_udp)
 				continue;
 			while ((n -= udp_tap_handler(c, AF_INET, &seq->saddr,
@@ -868,16 +872,21 @@ append:
 
 	for (j = 0, seq = tap6_l4; j < seq_count; j++, seq++) {
 		struct pool *p = (struct pool *)&seq->p;
-		size_t n = p->count;
 
-		tap_packet_debug(NULL, NULL, NULL, seq->protocol, seq, n);
+		tap_packet_debug(NULL, NULL, NULL, seq->protocol, seq,
+				 p->count);
 
 		if (seq->protocol == IPPROTO_TCP) {
+			size_t k;
+
 			if (c->no_tcp)
 				continue;
-			while ((n -= tcp_tap_handler(c, AF_INET6, &seq->saddr,
-						     &seq->daddr, p, now)));
+			for (k = 0; k < p->count; )
+				k += tcp_tap_handler(c, AF_INET6, &seq->saddr,
+						     &seq->daddr, p, k, now);
 		} else if (seq->protocol == IPPROTO_UDP) {
+			size_t n = p->count;
+
 			if (c->no_udp)
 				continue;
 			while ((n -= udp_tap_handler(c, AF_INET6, &seq->saddr,
diff --git a/tcp.c b/tcp.c
index c89e6e4..d8c2327 100644
--- a/tcp.c
+++ b/tcp.c
@@ -2294,11 +2294,12 @@ err:
  * @c:		Execution context
  * @conn:	Connection pointer
  * @p:		Pool of TCP packets, with TCP headers
+ * @idx:	Index of first data packet in pool
  *
  * #syscalls sendmsg
  */
 static void tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
-			      const struct pool *p)
+			      const struct pool *p, int idx)
 {
 	int i, iov_i, ack = 0, fin = 0, retr = 0, keep = -1, partial_send = 0;
 	uint16_t max_ack_seq_wnd = conn->wnd_from_tap;
@@ -2313,7 +2314,7 @@ static void tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
 
 	ASSERT(conn->events & ESTABLISHED);
 
-	for (i = 0, iov_i = 0; i < (int)p->count; i++) {
+	for (i = idx, iov_i = 0; i < (int)p->count; i++) {
 		uint32_t seq, seq_offset, ack_seq;
 		struct tcphdr *th;
 		char *data;
@@ -2530,12 +2531,13 @@ static void tcp_conn_from_sock_finish(struct ctx *c, struct tcp_tap_conn *conn,
  * @saddr:	Source address
  * @daddr:	Destination address
  * @p:		Pool of TCP packets, with TCP headers
+ * @idx:	Index of first packet in pool to process
  * @now:	Current timestamp
  *
  * Return: count of consumed packets
  */
 int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
-		    const struct pool *p, const struct timespec *now)
+		    const struct pool *p, int idx, const struct timespec *now)
 {
 	struct tcp_tap_conn *conn;
 	size_t optlen, len;
@@ -2543,17 +2545,17 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
 	int ack_due = 0;
 	char *opts;
 
-	if (!packet_get(p, 0, 0, 0, &len))
+	if (!packet_get(p, idx, 0, 0, &len))
 		return 1;
 
-	th = packet_get(p, 0, 0, sizeof(*th), NULL);
+	th = packet_get(p, idx, 0, sizeof(*th), NULL);
 	if (!th)
 		return 1;
 
 	optlen = th->doff * 4UL - sizeof(*th);
 	/* Static checkers might fail to see this: */
 	optlen = MIN(optlen, ((1UL << 4) /* from doff width */ - 6) * 4UL);
-	opts = packet_get(p, 0, sizeof(*th), optlen, NULL);
+	opts = packet_get(p, idx, sizeof(*th), optlen, NULL);
 
 	conn = tcp_hash_lookup(c, af, daddr, htons(th->source), htons(th->dest));
 
@@ -2569,7 +2571,7 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
 
 	if (th->rst) {
 		conn_event(c, conn, CLOSED);
-		return p->count;
+		return p->count - idx;
 	}
 
 	if (th->ack && !(conn->events & ESTABLISHED))
@@ -2591,7 +2593,7 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
 	if (conn->events & TAP_SYN_RCVD) {
 		if (!(conn->events & TAP_SYN_ACK_SENT)) {
 			tcp_rst(c, conn);
-			return p->count;
+			return p->count - idx;
 		}
 
 		conn_event(c, conn, ESTABLISHED);
@@ -2603,19 +2605,19 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
 			tcp_send_flag(c, conn, ACK);
 			conn_event(c, conn, SOCK_FIN_SENT);
 
-			return p->count;
+			return p->count - idx;
 		}
 
 		if (!th->ack) {
 			tcp_rst(c, conn);
-			return p->count;
+			return p->count - idx;
 		}
 
 		tcp_clamp_window(c, conn, ntohs(th->window));
 
 		tcp_data_from_sock(c, conn);
 
-		if (p->count == 1)
+		if (p->count - idx == 1)
 			return 1;
 	}
 
@@ -2631,7 +2633,7 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
 	}
 
 	/* Established connections accepting data from tap */
-	tcp_data_from_tap(c, conn, p);
+	tcp_data_from_tap(c, conn, p, idx);
 	if (conn->seq_ack_to_tap != conn->seq_from_tap)
 		ack_due = 1;
 
@@ -2645,7 +2647,7 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
 	if (ack_due)
 		conn_flag(c, conn, ACK_TO_TAP_DUE);
 
-	return p->count;
+	return p->count - idx;
 }
 
 /**
diff --git a/tcp.h b/tcp.h
index 9eaec3f..6444d6a 100644
--- a/tcp.h
+++ b/tcp.h
@@ -18,7 +18,7 @@ void tcp_listen_handler(struct ctx *c, union epoll_ref ref,
 			const struct timespec *now);
 void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events);
 int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
-		    const struct pool *p, const struct timespec *now);
+		    const struct pool *p, int idx, const struct timespec *now);
 int tcp_sock_init(const struct ctx *c, sa_family_t af, const void *addr,
 		  const char *ifname, in_port_t port);
 int tcp_init(struct ctx *c);
-- 
@@ -18,7 +18,7 @@ void tcp_listen_handler(struct ctx *c, union epoll_ref ref,
 			const struct timespec *now);
 void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events);
 int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
-		    const struct pool *p, const struct timespec *now);
+		    const struct pool *p, int idx, const struct timespec *now);
 int tcp_sock_init(const struct ctx *c, sa_family_t af, const void *addr,
 		  const char *ifname, in_port_t port);
 int tcp_init(struct ctx *c);
-- 
2.41.0


  reply	other threads:[~2023-09-08  1:50 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-09-08  1:49 [PATCH 0/8] Fix a number of bugs with handling of TCP packets from tap David Gibson
2023-09-08  1:49 ` David Gibson [this message]
2023-09-08  1:49 ` [PATCH 2/8] udp, tap: Correctly advance through packets in udp_tap_handler() David Gibson
2023-09-08  1:49 ` [PATCH 3/8] tcp: Remove some redundant packet_get() operations David Gibson
2023-09-08  1:49 ` [PATCH 4/8] tcp: Never hash match closed connections David Gibson
2023-09-08  1:49 ` [PATCH 5/8] tcp: Return consumed packet count from tcp_data_from_tap() David Gibson
2023-09-08  1:49 ` [PATCH 6/8] tcp: Correctly handle RST followed rapidly by SYN David Gibson
2023-09-08  1:49 ` [PATCH 7/8] tcp: Consolidate paths where we initiate reset on tap interface David Gibson
2023-09-08  1:49 ` [PATCH 8/8] tcp: Correct handling of FIN,ACK followed by SYN David Gibson
2023-09-08 15:27 ` [PATCH 0/8] Fix a number of bugs with handling of TCP packets from tap Stefano Brivio

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230908014953.822952-2-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=jlesev@gmail.com \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).