* [PATCH] flow: fix podman issue #25959
@ 2025-04-30 16:05 Laurent Vivier
2025-05-02 17:26 ` Stefano Brivio
0 siblings, 1 reply; 2+ messages in thread
From: Laurent Vivier @ 2025-04-30 16:05 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
While running piHole using podman, traffic can trigger the following
assert:
ASSSERTION FAILED in flow_alloc (flow.c:521): flow->f.state == FLOW_STATE_FREE
Backtrace shows that this happens in flow_defer_handler():
#4 0x00005610d6f5b481 flow_alloc (passt + 0xb481)
#5 0x00005610d6f74f86 udp_flow_from_sock (passt + 0x24f86)
#6 0x00005610d6f737c3 udp_sock_fwd (passt + 0x237c3)
#7 0x00005610d6f74c07 udp_flush_flow (passt + 0x24c07)
#8 0x00005610d6f752c2 udp_flow_defer (passt + 0x252c2)
#9 0x00005610d6f5bce1 flow_defer_handler (passt + 0xbce1)
We are trying to allocate a new flow inside the loop freeing them.
Inside the loop free_head points to the first free flow entry in the
current cluster. But if we allocate a new entry during the loop,
free_head is not updated and can point now to the entry we have just
allocated.
We can fix the problem by spliting the loop in two parts:
- first part where we can close some of them and allocate some new
flow entries,
- second part where we free the entries closed in the previous loop
and we aggregate the free entries to merge consecutive the clusters.
Link: https://github.com/containers/podman/issues/25959
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
flow.c | 107 ++++++++++++++++++++++++++++++---------------------------
1 file changed, 57 insertions(+), 50 deletions(-)
diff --git a/flow.c b/flow.c
index 3c81cb42f921..00c1b2cc316f 100644
--- a/flow.c
+++ b/flow.c
@@ -788,6 +788,7 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
{
struct flow_free_cluster *free_head = NULL;
unsigned *last_next = &flow_first_free;
+ bool to_free[FLOW_MAX] = { 0 };
bool timer = false;
union flow *flow;
@@ -798,9 +799,44 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
ASSERT(!flow_new_entry); /* Incomplete flow at end of cycle */
- flow_foreach_slot(flow) {
+ /* Check which flows we might need to close first, but don't free them
+ * yet as it's not safe to do that in the middle of flow_foreach().
+ */
+ flow_foreach(flow) {
bool closed = false;
+ switch (flow->f.type) {
+ case FLOW_TYPE_NONE:
+ ASSERT(false);
+ break;
+ case FLOW_TCP:
+ closed = tcp_flow_defer(&flow->tcp);
+ break;
+ case FLOW_TCP_SPLICE:
+ closed = tcp_splice_flow_defer(&flow->tcp_splice);
+ if (!closed && timer)
+ tcp_splice_timer(c, &flow->tcp_splice);
+ break;
+ case FLOW_PING4:
+ case FLOW_PING6:
+ if (timer)
+ closed = icmp_ping_timer(c, &flow->ping, now);
+ break;
+ case FLOW_UDP:
+ closed = udp_flow_defer(c, &flow->udp, now);
+ if (!closed && timer)
+ closed = udp_flow_timer(c, &flow->udp, now);
+ break;
+ default:
+ /* Assume other flow types don't need any handling */
+ ;
+ }
+
+ to_free[FLOW_IDX(flow)] = closed;
+ }
+
+ /* Second step: actually free the flows */
+ flow_foreach_slot(flow) {
switch (flow->f.state) {
case FLOW_STATE_FREE: {
unsigned skip = flow->free.n;
@@ -833,60 +869,31 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
break;
case FLOW_STATE_ACTIVE:
- /* Nothing to do */
+ if (to_free[FLOW_IDX(flow)]) {
+ flow_set_state(&flow->f, FLOW_STATE_FREE);
+ memset(flow, 0, sizeof(*flow));
+
+ if (free_head) {
+ /* Add slot to current free cluster */
+ ASSERT(FLOW_IDX(flow) ==
+ FLOW_IDX(free_head) + free_head->n);
+ free_head->n++;
+ flow->free.n = flow->free.next = 0;
+ } else {
+ /* Create new free cluster */
+ free_head = &flow->free;
+ free_head->n = 1;
+ *last_next = FLOW_IDX(flow);
+ last_next = &free_head->next;
+ }
+ } else {
+ free_head = NULL;
+ }
break;
default:
ASSERT(false);
}
-
- switch (flow->f.type) {
- case FLOW_TYPE_NONE:
- ASSERT(false);
- break;
- case FLOW_TCP:
- closed = tcp_flow_defer(&flow->tcp);
- break;
- case FLOW_TCP_SPLICE:
- closed = tcp_splice_flow_defer(&flow->tcp_splice);
- if (!closed && timer)
- tcp_splice_timer(c, &flow->tcp_splice);
- break;
- case FLOW_PING4:
- case FLOW_PING6:
- if (timer)
- closed = icmp_ping_timer(c, &flow->ping, now);
- break;
- case FLOW_UDP:
- closed = udp_flow_defer(c, &flow->udp, now);
- if (!closed && timer)
- closed = udp_flow_timer(c, &flow->udp, now);
- break;
- default:
- /* Assume other flow types don't need any handling */
- ;
- }
-
- if (closed) {
- flow_set_state(&flow->f, FLOW_STATE_FREE);
- memset(flow, 0, sizeof(*flow));
-
- if (free_head) {
- /* Add slot to current free cluster */
- ASSERT(FLOW_IDX(flow) ==
- FLOW_IDX(free_head) + free_head->n);
- free_head->n++;
- flow->free.n = flow->free.next = 0;
- } else {
- /* Create new free cluster */
- free_head = &flow->free;
- free_head->n = 1;
- *last_next = FLOW_IDX(flow);
- last_next = &free_head->next;
- }
- } else {
- free_head = NULL;
- }
}
*last_next = FLOW_MAX;
--
@@ -788,6 +788,7 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
{
struct flow_free_cluster *free_head = NULL;
unsigned *last_next = &flow_first_free;
+ bool to_free[FLOW_MAX] = { 0 };
bool timer = false;
union flow *flow;
@@ -798,9 +799,44 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
ASSERT(!flow_new_entry); /* Incomplete flow at end of cycle */
- flow_foreach_slot(flow) {
+ /* Check which flows we might need to close first, but don't free them
+ * yet as it's not safe to do that in the middle of flow_foreach().
+ */
+ flow_foreach(flow) {
bool closed = false;
+ switch (flow->f.type) {
+ case FLOW_TYPE_NONE:
+ ASSERT(false);
+ break;
+ case FLOW_TCP:
+ closed = tcp_flow_defer(&flow->tcp);
+ break;
+ case FLOW_TCP_SPLICE:
+ closed = tcp_splice_flow_defer(&flow->tcp_splice);
+ if (!closed && timer)
+ tcp_splice_timer(c, &flow->tcp_splice);
+ break;
+ case FLOW_PING4:
+ case FLOW_PING6:
+ if (timer)
+ closed = icmp_ping_timer(c, &flow->ping, now);
+ break;
+ case FLOW_UDP:
+ closed = udp_flow_defer(c, &flow->udp, now);
+ if (!closed && timer)
+ closed = udp_flow_timer(c, &flow->udp, now);
+ break;
+ default:
+ /* Assume other flow types don't need any handling */
+ ;
+ }
+
+ to_free[FLOW_IDX(flow)] = closed;
+ }
+
+ /* Second step: actually free the flows */
+ flow_foreach_slot(flow) {
switch (flow->f.state) {
case FLOW_STATE_FREE: {
unsigned skip = flow->free.n;
@@ -833,60 +869,31 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
break;
case FLOW_STATE_ACTIVE:
- /* Nothing to do */
+ if (to_free[FLOW_IDX(flow)]) {
+ flow_set_state(&flow->f, FLOW_STATE_FREE);
+ memset(flow, 0, sizeof(*flow));
+
+ if (free_head) {
+ /* Add slot to current free cluster */
+ ASSERT(FLOW_IDX(flow) ==
+ FLOW_IDX(free_head) + free_head->n);
+ free_head->n++;
+ flow->free.n = flow->free.next = 0;
+ } else {
+ /* Create new free cluster */
+ free_head = &flow->free;
+ free_head->n = 1;
+ *last_next = FLOW_IDX(flow);
+ last_next = &free_head->next;
+ }
+ } else {
+ free_head = NULL;
+ }
break;
default:
ASSERT(false);
}
-
- switch (flow->f.type) {
- case FLOW_TYPE_NONE:
- ASSERT(false);
- break;
- case FLOW_TCP:
- closed = tcp_flow_defer(&flow->tcp);
- break;
- case FLOW_TCP_SPLICE:
- closed = tcp_splice_flow_defer(&flow->tcp_splice);
- if (!closed && timer)
- tcp_splice_timer(c, &flow->tcp_splice);
- break;
- case FLOW_PING4:
- case FLOW_PING6:
- if (timer)
- closed = icmp_ping_timer(c, &flow->ping, now);
- break;
- case FLOW_UDP:
- closed = udp_flow_defer(c, &flow->udp, now);
- if (!closed && timer)
- closed = udp_flow_timer(c, &flow->udp, now);
- break;
- default:
- /* Assume other flow types don't need any handling */
- ;
- }
-
- if (closed) {
- flow_set_state(&flow->f, FLOW_STATE_FREE);
- memset(flow, 0, sizeof(*flow));
-
- if (free_head) {
- /* Add slot to current free cluster */
- ASSERT(FLOW_IDX(flow) ==
- FLOW_IDX(free_head) + free_head->n);
- free_head->n++;
- flow->free.n = flow->free.next = 0;
- } else {
- /* Create new free cluster */
- free_head = &flow->free;
- free_head->n = 1;
- *last_next = FLOW_IDX(flow);
- last_next = &free_head->next;
- }
- } else {
- free_head = NULL;
- }
}
*last_next = FLOW_MAX;
--
2.49.0
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH] flow: fix podman issue #25959
2025-04-30 16:05 [PATCH] flow: fix podman issue #25959 Laurent Vivier
@ 2025-05-02 17:26 ` Stefano Brivio
0 siblings, 0 replies; 2+ messages in thread
From: Stefano Brivio @ 2025-05-02 17:26 UTC (permalink / raw)
To: Laurent Vivier; +Cc: passt-dev, Martin Rijntjes
On Wed, 30 Apr 2025 18:05:25 +0200
Laurent Vivier <lvivier@redhat.com> wrote:
> While running piHole using podman, traffic can trigger the following
> assert:
>
> ASSSERTION FAILED in flow_alloc (flow.c:521): flow->f.state == FLOW_STATE_FREE
>
> Backtrace shows that this happens in flow_defer_handler():
>
> #4 0x00005610d6f5b481 flow_alloc (passt + 0xb481)
> #5 0x00005610d6f74f86 udp_flow_from_sock (passt + 0x24f86)
> #6 0x00005610d6f737c3 udp_sock_fwd (passt + 0x237c3)
> #7 0x00005610d6f74c07 udp_flush_flow (passt + 0x24c07)
> #8 0x00005610d6f752c2 udp_flow_defer (passt + 0x252c2)
> #9 0x00005610d6f5bce1 flow_defer_handler (passt + 0xbce1)
>
> We are trying to allocate a new flow inside the loop freeing them.
>
> Inside the loop free_head points to the first free flow entry in the
> current cluster. But if we allocate a new entry during the loop,
> free_head is not updated and can point now to the entry we have just
> allocated.
>
> We can fix the problem by spliting the loop in two parts:
> - first part where we can close some of them and allocate some new
> flow entries,
> - second part where we free the entries closed in the previous loop
> and we aggregate the free entries to merge consecutive the clusters.
>
> Link: https://github.com/containers/podman/issues/25959
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Applied, thanks!
--
Stefano
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2025-05-02 17:27 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-04-30 16:05 [PATCH] flow: fix podman issue #25959 Laurent Vivier
2025-05-02 17:26 ` Stefano Brivio
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).