public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
* [PATCH] tcp: move seq_to_tap update to when frame is queued
@ 2024-05-09  3:00 Jon Maloy
  2024-05-10 16:40 ` Stefano Brivio
  0 siblings, 1 reply; 5+ messages in thread
From: Jon Maloy @ 2024-05-09  3:00 UTC (permalink / raw)
  To: passt-dev, sbrivio, lvivier, dgibson, jmaloy

commit a469fc393fa1 ("tcp, tap: Don't increase tap-side sequence counter for dropped frames")
delayed update of conn->seq_to_tap until the moment the corresponding
frame has been successfully pushed out. This has the advantage that we
immediately can retransmit a buffer that we fail to trasnmit, rather
than waiting for the peer side to discover the loss and initiate fast
retransmit.

This approach has turned out to cause a problem with spurious sequence
number updates during peer-initiated retransmits, and we have realized
it may not be the best way to solve te above issue.

We now restore the previous method, by updating the said field at the
moment a frame is added to the outqueue. To retain the advantage of fast
retansmit based on local failure detection, we now scan through the part
of the outqueue that had do be dropped, and restore the sequence counter
for each affected connection to the most appropriate value.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 tcp.c | 52 ++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 42 insertions(+), 10 deletions(-)

diff --git a/tcp.c b/tcp.c
index 21d0af0..58fdbc9 100644
--- a/tcp.c
+++ b/tcp.c
@@ -412,11 +412,13 @@ static union inany_addr low_rtt_dst[LOW_RTT_TABLE_SIZE];
 
 /**
  * tcp_buf_seq_update - Sequences to update with length of frames once sent
- * @seq:	Pointer to sequence number sent to tap-side, to be updated
+ * @conn:       Pointer to connection corresponding to frame. May need update
+ * @seq:	Sequence number of the corresponding frame
  * @len:	TCP payload length
  */
 struct tcp_buf_seq_update {
-	uint32_t *seq;
+	struct tcp_tap_conn *conn;
+	uint32_t seq;
 	uint16_t len;
 };
 
@@ -1261,25 +1263,52 @@ static void tcp_flags_flush(const struct ctx *c)
 	tcp4_flags_used = 0;
 }
 
+/**
+ * tcp_revert_seq() - Revert affected conn->seq_to_tap after failed transmission
+ * @seq_update: Array with connection and sequence number data
+ * @s:          Entry corresponding to first dropped frame
+ * @e:          Entry corresponding to last dropped frame
+ */
+static void tcp_revert_seq(struct tcp_buf_seq_update *seq_update, int s, int e)
+{
+	struct tcp_tap_conn *conn;
+	uint32_t lowest_seq;
+	int i, ii;
+
+	for (i = s; i < e; i++) {
+		conn = seq_update[i].conn;
+		lowest_seq = seq_update[i].seq;
+
+		for (ii = i + 1; ii < e; ii++) {
+			if (seq_update[ii].conn != conn)
+				continue;
+			if (SEQ_GT(lowest_seq, seq_update[ii].seq))
+				lowest_seq = seq_update[ii].seq;
+		}
+
+		if (SEQ_GT(conn->seq_to_tap, lowest_seq))
+			conn->seq_to_tap = lowest_seq;
+	}
+}
+
 /**
  * tcp_payload_flush() - Send out buffers for segments with data
  * @c:		Execution context
  */
 static void tcp_payload_flush(const struct ctx *c)
 {
-	unsigned i;
 	size_t m;
 
 	m = tap_send_frames(c, &tcp6_l2_iov[0][0], TCP_NUM_IOVS,
 			    tcp6_payload_used);
-	for (i = 0; i < m; i++)
-		*tcp6_seq_update[i].seq += tcp6_seq_update[i].len;
+	if (m != tcp6_payload_used)
+		tcp_revert_seq(tcp6_seq_update, m, tcp6_payload_used);
 	tcp6_payload_used = 0;
 
 	m = tap_send_frames(c, &tcp4_l2_iov[0][0], TCP_NUM_IOVS,
 			    tcp4_payload_used);
-	for (i = 0; i < m; i++)
-		*tcp4_seq_update[i].seq += tcp4_seq_update[i].len;
+	if (m != tcp4_payload_used)
+		tcp_revert_seq(tcp4_seq_update, m, tcp4_payload_used);
 	tcp4_payload_used = 0;
 }
 
@@ -2129,10 +2158,11 @@ static int tcp_sock_consume(const struct tcp_tap_conn *conn, uint32_t ack_seq)
 static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
 			    ssize_t dlen, int no_csum, uint32_t seq)
 {
-	uint32_t *seq_update = &conn->seq_to_tap;
 	struct iovec *iov;
 	size_t l4len;
 
+	conn->seq_to_tap = seq;
+
 	if (CONN_V4(conn)) {
 		struct iovec *iov_prev = tcp4_l2_iov[tcp4_payload_used - 1];
 		const uint16_t *check = NULL;
@@ -2142,7 +2172,8 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
 			check = &iph->check;
 		}
 
-		tcp4_seq_update[tcp4_payload_used].seq = seq_update;
+		tcp4_seq_update[tcp4_payload_used].conn = conn;
+		tcp4_seq_update[tcp4_payload_used].seq = seq;
 		tcp4_seq_update[tcp4_payload_used].len = dlen;
 
 		iov = tcp4_l2_iov[tcp4_payload_used++];
@@ -2151,7 +2182,8 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
 		if (tcp4_payload_used > TCP_FRAMES_MEM - 1)
 			tcp_payload_flush(c);
 	} else if (CONN_V6(conn)) {
-		tcp6_seq_update[tcp6_payload_used].seq = seq_update;
+		tcp6_seq_update[tcp6_payload_used].conn = conn;
+		tcp6_seq_update[tcp6_payload_used].seq = seq;
 		tcp6_seq_update[tcp6_payload_used].len = dlen;
 
 		iov = tcp6_l2_iov[tcp6_payload_used++];
-- 
@@ -412,11 +412,13 @@ static union inany_addr low_rtt_dst[LOW_RTT_TABLE_SIZE];
 
 /**
  * tcp_buf_seq_update - Sequences to update with length of frames once sent
- * @seq:	Pointer to sequence number sent to tap-side, to be updated
+ * @conn:       Pointer to connection corresponding to frame. May need update
+ * @seq:	Sequence number of the corresponding frame
  * @len:	TCP payload length
  */
 struct tcp_buf_seq_update {
-	uint32_t *seq;
+	struct tcp_tap_conn *conn;
+	uint32_t seq;
 	uint16_t len;
 };
 
@@ -1261,25 +1263,52 @@ static void tcp_flags_flush(const struct ctx *c)
 	tcp4_flags_used = 0;
 }
 
+/**
+ * tcp_revert_seq() - Revert affected conn->seq_to_tap after failed transmission
+ * @seq_update: Array with connection and sequence number data
+ * @s:          Entry corresponding to first dropped frame
+ * @e:          Entry corresponding to last dropped frame
+ */
+static void tcp_revert_seq(struct tcp_buf_seq_update *seq_update, int s, int e)
+{
+	struct tcp_tap_conn *conn;
+	uint32_t lowest_seq;
+	int i, ii;
+
+	for (i = s; i < e; i++) {
+		conn = seq_update[i].conn;
+		lowest_seq = seq_update[i].seq;
+
+		for (ii = i + 1; ii < e; ii++) {
+			if (seq_update[ii].conn != conn)
+				continue;
+			if (SEQ_GT(lowest_seq, seq_update[ii].seq))
+				lowest_seq = seq_update[ii].seq;
+		}
+
+		if (SEQ_GT(conn->seq_to_tap, lowest_seq))
+			conn->seq_to_tap = lowest_seq;
+	}
+}
+
 /**
  * tcp_payload_flush() - Send out buffers for segments with data
  * @c:		Execution context
  */
 static void tcp_payload_flush(const struct ctx *c)
 {
-	unsigned i;
 	size_t m;
 
 	m = tap_send_frames(c, &tcp6_l2_iov[0][0], TCP_NUM_IOVS,
 			    tcp6_payload_used);
-	for (i = 0; i < m; i++)
-		*tcp6_seq_update[i].seq += tcp6_seq_update[i].len;
+	if (m != tcp6_payload_used)
+		tcp_revert_seq(tcp6_seq_update, m, tcp6_payload_used);
 	tcp6_payload_used = 0;
 
 	m = tap_send_frames(c, &tcp4_l2_iov[0][0], TCP_NUM_IOVS,
 			    tcp4_payload_used);
-	for (i = 0; i < m; i++)
-		*tcp4_seq_update[i].seq += tcp4_seq_update[i].len;
+	if (m != tcp4_payload_used)
+		tcp_revert_seq(tcp4_seq_update, m, tcp4_payload_used);
 	tcp4_payload_used = 0;
 }
 
@@ -2129,10 +2158,11 @@ static int tcp_sock_consume(const struct tcp_tap_conn *conn, uint32_t ack_seq)
 static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
 			    ssize_t dlen, int no_csum, uint32_t seq)
 {
-	uint32_t *seq_update = &conn->seq_to_tap;
 	struct iovec *iov;
 	size_t l4len;
 
+	conn->seq_to_tap = seq;
+
 	if (CONN_V4(conn)) {
 		struct iovec *iov_prev = tcp4_l2_iov[tcp4_payload_used - 1];
 		const uint16_t *check = NULL;
@@ -2142,7 +2172,8 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
 			check = &iph->check;
 		}
 
-		tcp4_seq_update[tcp4_payload_used].seq = seq_update;
+		tcp4_seq_update[tcp4_payload_used].conn = conn;
+		tcp4_seq_update[tcp4_payload_used].seq = seq;
 		tcp4_seq_update[tcp4_payload_used].len = dlen;
 
 		iov = tcp4_l2_iov[tcp4_payload_used++];
@@ -2151,7 +2182,8 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
 		if (tcp4_payload_used > TCP_FRAMES_MEM - 1)
 			tcp_payload_flush(c);
 	} else if (CONN_V6(conn)) {
-		tcp6_seq_update[tcp6_payload_used].seq = seq_update;
+		tcp6_seq_update[tcp6_payload_used].conn = conn;
+		tcp6_seq_update[tcp6_payload_used].seq = seq;
 		tcp6_seq_update[tcp6_payload_used].len = dlen;
 
 		iov = tcp6_l2_iov[tcp6_payload_used++];
-- 
2.42.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-05-13  1:32 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-05-09  3:00 [PATCH] tcp: move seq_to_tap update to when frame is queued Jon Maloy
2024-05-10 16:40 ` Stefano Brivio
2024-05-10 19:40   ` Jon Maloy
2024-05-13  1:32     ` David Gibson
2024-05-13  1:03   ` David Gibson

Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).