public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
* [PATCH] flow: fix podman issue #25959
@ 2025-04-30 16:05 Laurent Vivier
  2025-05-02 17:26 ` Stefano Brivio
  0 siblings, 1 reply; 2+ messages in thread
From: Laurent Vivier @ 2025-04-30 16:05 UTC (permalink / raw)
  To: passt-dev; +Cc: Laurent Vivier

While running piHole using podman, traffic can trigger the following
assert:

ASSSERTION FAILED in flow_alloc (flow.c:521): flow->f.state == FLOW_STATE_FREE

Backtrace shows that this happens in flow_defer_handler():

    #4  0x00005610d6f5b481 flow_alloc (passt + 0xb481)
    #5  0x00005610d6f74f86 udp_flow_from_sock (passt + 0x24f86)
    #6  0x00005610d6f737c3 udp_sock_fwd (passt + 0x237c3)
    #7  0x00005610d6f74c07 udp_flush_flow (passt + 0x24c07)
    #8  0x00005610d6f752c2 udp_flow_defer (passt + 0x252c2)
    #9  0x00005610d6f5bce1 flow_defer_handler (passt + 0xbce1)

We are trying to allocate a new flow inside the loop freeing them.

Inside the loop free_head points to the first free flow entry in the
current cluster. But if we allocate a new entry during the loop,
free_head is not updated and can point now to the entry we have just
allocated.

We can fix the problem by spliting the loop in two parts:
- first part where we can close some of them and allocate some new
  flow entries,
- second part where we free the entries closed in the previous loop
  and we aggregate the free entries to merge consecutive the clusters.

Link: https://github.com/containers/podman/issues/25959
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
 flow.c | 107 ++++++++++++++++++++++++++++++---------------------------
 1 file changed, 57 insertions(+), 50 deletions(-)

diff --git a/flow.c b/flow.c
index 3c81cb42f921..00c1b2cc316f 100644
--- a/flow.c
+++ b/flow.c
@@ -788,6 +788,7 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
 {
 	struct flow_free_cluster *free_head = NULL;
 	unsigned *last_next = &flow_first_free;
+	bool to_free[FLOW_MAX] = { 0 };
 	bool timer = false;
 	union flow *flow;
 
@@ -798,9 +799,44 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
 
 	ASSERT(!flow_new_entry); /* Incomplete flow at end of cycle */
 
-	flow_foreach_slot(flow) {
+	/* Check which flows we might need to close first, but don't free them
+	 * yet as it's not safe to do that in the middle of flow_foreach().
+	 */
+	flow_foreach(flow) {
 		bool closed = false;
 
+		switch (flow->f.type) {
+		case FLOW_TYPE_NONE:
+			ASSERT(false);
+			break;
+		case FLOW_TCP:
+			closed = tcp_flow_defer(&flow->tcp);
+			break;
+		case FLOW_TCP_SPLICE:
+			closed = tcp_splice_flow_defer(&flow->tcp_splice);
+			if (!closed && timer)
+				tcp_splice_timer(c, &flow->tcp_splice);
+			break;
+		case FLOW_PING4:
+		case FLOW_PING6:
+			if (timer)
+				closed = icmp_ping_timer(c, &flow->ping, now);
+			break;
+		case FLOW_UDP:
+			closed = udp_flow_defer(c, &flow->udp, now);
+			if (!closed && timer)
+				closed = udp_flow_timer(c, &flow->udp, now);
+			break;
+		default:
+			/* Assume other flow types don't need any handling */
+			;
+		}
+
+		to_free[FLOW_IDX(flow)] = closed;
+	}
+
+	/* Second step: actually free the flows */
+	flow_foreach_slot(flow) {
 		switch (flow->f.state) {
 		case FLOW_STATE_FREE: {
 			unsigned skip = flow->free.n;
@@ -833,60 +869,31 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
 			break;
 
 		case FLOW_STATE_ACTIVE:
-			/* Nothing to do */
+			if (to_free[FLOW_IDX(flow)]) {
+				flow_set_state(&flow->f, FLOW_STATE_FREE);
+				memset(flow, 0, sizeof(*flow));
+
+				if (free_head) {
+					/* Add slot to current free cluster */
+					ASSERT(FLOW_IDX(flow) ==
+					    FLOW_IDX(free_head) + free_head->n);
+					free_head->n++;
+					flow->free.n = flow->free.next = 0;
+				} else {
+					/* Create new free cluster */
+					free_head = &flow->free;
+					free_head->n = 1;
+					*last_next = FLOW_IDX(flow);
+					last_next = &free_head->next;
+				}
+			} else {
+				free_head = NULL;
+			}
 			break;
 
 		default:
 			ASSERT(false);
 		}
-
-		switch (flow->f.type) {
-		case FLOW_TYPE_NONE:
-			ASSERT(false);
-			break;
-		case FLOW_TCP:
-			closed = tcp_flow_defer(&flow->tcp);
-			break;
-		case FLOW_TCP_SPLICE:
-			closed = tcp_splice_flow_defer(&flow->tcp_splice);
-			if (!closed && timer)
-				tcp_splice_timer(c, &flow->tcp_splice);
-			break;
-		case FLOW_PING4:
-		case FLOW_PING6:
-			if (timer)
-				closed = icmp_ping_timer(c, &flow->ping, now);
-			break;
-		case FLOW_UDP:
-			closed = udp_flow_defer(c, &flow->udp, now);
-			if (!closed && timer)
-				closed = udp_flow_timer(c, &flow->udp, now);
-			break;
-		default:
-			/* Assume other flow types don't need any handling */
-			;
-		}
-
-		if (closed) {
-			flow_set_state(&flow->f, FLOW_STATE_FREE);
-			memset(flow, 0, sizeof(*flow));
-
-			if (free_head) {
-				/* Add slot to current free cluster */
-				ASSERT(FLOW_IDX(flow) ==
-				       FLOW_IDX(free_head) + free_head->n);
-				free_head->n++;
-				flow->free.n = flow->free.next = 0;
-			} else {
-				/* Create new free cluster */
-				free_head = &flow->free;
-				free_head->n = 1;
-				*last_next = FLOW_IDX(flow);
-				last_next = &free_head->next;
-			}
-		} else {
-			free_head = NULL;
-		}
 	}
 
 	*last_next = FLOW_MAX;
-- 
@@ -788,6 +788,7 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
 {
 	struct flow_free_cluster *free_head = NULL;
 	unsigned *last_next = &flow_first_free;
+	bool to_free[FLOW_MAX] = { 0 };
 	bool timer = false;
 	union flow *flow;
 
@@ -798,9 +799,44 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
 
 	ASSERT(!flow_new_entry); /* Incomplete flow at end of cycle */
 
-	flow_foreach_slot(flow) {
+	/* Check which flows we might need to close first, but don't free them
+	 * yet as it's not safe to do that in the middle of flow_foreach().
+	 */
+	flow_foreach(flow) {
 		bool closed = false;
 
+		switch (flow->f.type) {
+		case FLOW_TYPE_NONE:
+			ASSERT(false);
+			break;
+		case FLOW_TCP:
+			closed = tcp_flow_defer(&flow->tcp);
+			break;
+		case FLOW_TCP_SPLICE:
+			closed = tcp_splice_flow_defer(&flow->tcp_splice);
+			if (!closed && timer)
+				tcp_splice_timer(c, &flow->tcp_splice);
+			break;
+		case FLOW_PING4:
+		case FLOW_PING6:
+			if (timer)
+				closed = icmp_ping_timer(c, &flow->ping, now);
+			break;
+		case FLOW_UDP:
+			closed = udp_flow_defer(c, &flow->udp, now);
+			if (!closed && timer)
+				closed = udp_flow_timer(c, &flow->udp, now);
+			break;
+		default:
+			/* Assume other flow types don't need any handling */
+			;
+		}
+
+		to_free[FLOW_IDX(flow)] = closed;
+	}
+
+	/* Second step: actually free the flows */
+	flow_foreach_slot(flow) {
 		switch (flow->f.state) {
 		case FLOW_STATE_FREE: {
 			unsigned skip = flow->free.n;
@@ -833,60 +869,31 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
 			break;
 
 		case FLOW_STATE_ACTIVE:
-			/* Nothing to do */
+			if (to_free[FLOW_IDX(flow)]) {
+				flow_set_state(&flow->f, FLOW_STATE_FREE);
+				memset(flow, 0, sizeof(*flow));
+
+				if (free_head) {
+					/* Add slot to current free cluster */
+					ASSERT(FLOW_IDX(flow) ==
+					    FLOW_IDX(free_head) + free_head->n);
+					free_head->n++;
+					flow->free.n = flow->free.next = 0;
+				} else {
+					/* Create new free cluster */
+					free_head = &flow->free;
+					free_head->n = 1;
+					*last_next = FLOW_IDX(flow);
+					last_next = &free_head->next;
+				}
+			} else {
+				free_head = NULL;
+			}
 			break;
 
 		default:
 			ASSERT(false);
 		}
-
-		switch (flow->f.type) {
-		case FLOW_TYPE_NONE:
-			ASSERT(false);
-			break;
-		case FLOW_TCP:
-			closed = tcp_flow_defer(&flow->tcp);
-			break;
-		case FLOW_TCP_SPLICE:
-			closed = tcp_splice_flow_defer(&flow->tcp_splice);
-			if (!closed && timer)
-				tcp_splice_timer(c, &flow->tcp_splice);
-			break;
-		case FLOW_PING4:
-		case FLOW_PING6:
-			if (timer)
-				closed = icmp_ping_timer(c, &flow->ping, now);
-			break;
-		case FLOW_UDP:
-			closed = udp_flow_defer(c, &flow->udp, now);
-			if (!closed && timer)
-				closed = udp_flow_timer(c, &flow->udp, now);
-			break;
-		default:
-			/* Assume other flow types don't need any handling */
-			;
-		}
-
-		if (closed) {
-			flow_set_state(&flow->f, FLOW_STATE_FREE);
-			memset(flow, 0, sizeof(*flow));
-
-			if (free_head) {
-				/* Add slot to current free cluster */
-				ASSERT(FLOW_IDX(flow) ==
-				       FLOW_IDX(free_head) + free_head->n);
-				free_head->n++;
-				flow->free.n = flow->free.next = 0;
-			} else {
-				/* Create new free cluster */
-				free_head = &flow->free;
-				free_head->n = 1;
-				*last_next = FLOW_IDX(flow);
-				last_next = &free_head->next;
-			}
-		} else {
-			free_head = NULL;
-		}
 	}
 
 	*last_next = FLOW_MAX;
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2025-05-02 17:27 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-04-30 16:05 [PATCH] flow: fix podman issue #25959 Laurent Vivier
2025-05-02 17:26 ` Stefano Brivio

Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).