From: Laurent Vivier <lvivier@redhat.com>
To: passt-dev@passt.top
Cc: Laurent Vivier <lvivier@redhat.com>
Subject: [PATCH] flow: fix podman issue #25959
Date: Wed, 30 Apr 2025 18:05:25 +0200 [thread overview]
Message-ID: <20250430160525.3642997-1-lvivier@redhat.com> (raw)
While running piHole using podman, traffic can trigger the following
assert:
ASSSERTION FAILED in flow_alloc (flow.c:521): flow->f.state == FLOW_STATE_FREE
Backtrace shows that this happens in flow_defer_handler():
#4 0x00005610d6f5b481 flow_alloc (passt + 0xb481)
#5 0x00005610d6f74f86 udp_flow_from_sock (passt + 0x24f86)
#6 0x00005610d6f737c3 udp_sock_fwd (passt + 0x237c3)
#7 0x00005610d6f74c07 udp_flush_flow (passt + 0x24c07)
#8 0x00005610d6f752c2 udp_flow_defer (passt + 0x252c2)
#9 0x00005610d6f5bce1 flow_defer_handler (passt + 0xbce1)
We are trying to allocate a new flow inside the loop freeing them.
Inside the loop free_head points to the first free flow entry in the
current cluster. But if we allocate a new entry during the loop,
free_head is not updated and can point now to the entry we have just
allocated.
We can fix the problem by spliting the loop in two parts:
- first part where we can close some of them and allocate some new
flow entries,
- second part where we free the entries closed in the previous loop
and we aggregate the free entries to merge consecutive the clusters.
Link: https://github.com/containers/podman/issues/25959
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
flow.c | 107 ++++++++++++++++++++++++++++++---------------------------
1 file changed, 57 insertions(+), 50 deletions(-)
diff --git a/flow.c b/flow.c
index 3c81cb42f921..00c1b2cc316f 100644
--- a/flow.c
+++ b/flow.c
@@ -788,6 +788,7 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
{
struct flow_free_cluster *free_head = NULL;
unsigned *last_next = &flow_first_free;
+ bool to_free[FLOW_MAX] = { 0 };
bool timer = false;
union flow *flow;
@@ -798,9 +799,44 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
ASSERT(!flow_new_entry); /* Incomplete flow at end of cycle */
- flow_foreach_slot(flow) {
+ /* Check which flows we might need to close first, but don't free them
+ * yet as it's not safe to do that in the middle of flow_foreach().
+ */
+ flow_foreach(flow) {
bool closed = false;
+ switch (flow->f.type) {
+ case FLOW_TYPE_NONE:
+ ASSERT(false);
+ break;
+ case FLOW_TCP:
+ closed = tcp_flow_defer(&flow->tcp);
+ break;
+ case FLOW_TCP_SPLICE:
+ closed = tcp_splice_flow_defer(&flow->tcp_splice);
+ if (!closed && timer)
+ tcp_splice_timer(c, &flow->tcp_splice);
+ break;
+ case FLOW_PING4:
+ case FLOW_PING6:
+ if (timer)
+ closed = icmp_ping_timer(c, &flow->ping, now);
+ break;
+ case FLOW_UDP:
+ closed = udp_flow_defer(c, &flow->udp, now);
+ if (!closed && timer)
+ closed = udp_flow_timer(c, &flow->udp, now);
+ break;
+ default:
+ /* Assume other flow types don't need any handling */
+ ;
+ }
+
+ to_free[FLOW_IDX(flow)] = closed;
+ }
+
+ /* Second step: actually free the flows */
+ flow_foreach_slot(flow) {
switch (flow->f.state) {
case FLOW_STATE_FREE: {
unsigned skip = flow->free.n;
@@ -833,60 +869,31 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
break;
case FLOW_STATE_ACTIVE:
- /* Nothing to do */
+ if (to_free[FLOW_IDX(flow)]) {
+ flow_set_state(&flow->f, FLOW_STATE_FREE);
+ memset(flow, 0, sizeof(*flow));
+
+ if (free_head) {
+ /* Add slot to current free cluster */
+ ASSERT(FLOW_IDX(flow) ==
+ FLOW_IDX(free_head) + free_head->n);
+ free_head->n++;
+ flow->free.n = flow->free.next = 0;
+ } else {
+ /* Create new free cluster */
+ free_head = &flow->free;
+ free_head->n = 1;
+ *last_next = FLOW_IDX(flow);
+ last_next = &free_head->next;
+ }
+ } else {
+ free_head = NULL;
+ }
break;
default:
ASSERT(false);
}
-
- switch (flow->f.type) {
- case FLOW_TYPE_NONE:
- ASSERT(false);
- break;
- case FLOW_TCP:
- closed = tcp_flow_defer(&flow->tcp);
- break;
- case FLOW_TCP_SPLICE:
- closed = tcp_splice_flow_defer(&flow->tcp_splice);
- if (!closed && timer)
- tcp_splice_timer(c, &flow->tcp_splice);
- break;
- case FLOW_PING4:
- case FLOW_PING6:
- if (timer)
- closed = icmp_ping_timer(c, &flow->ping, now);
- break;
- case FLOW_UDP:
- closed = udp_flow_defer(c, &flow->udp, now);
- if (!closed && timer)
- closed = udp_flow_timer(c, &flow->udp, now);
- break;
- default:
- /* Assume other flow types don't need any handling */
- ;
- }
-
- if (closed) {
- flow_set_state(&flow->f, FLOW_STATE_FREE);
- memset(flow, 0, sizeof(*flow));
-
- if (free_head) {
- /* Add slot to current free cluster */
- ASSERT(FLOW_IDX(flow) ==
- FLOW_IDX(free_head) + free_head->n);
- free_head->n++;
- flow->free.n = flow->free.next = 0;
- } else {
- /* Create new free cluster */
- free_head = &flow->free;
- free_head->n = 1;
- *last_next = FLOW_IDX(flow);
- last_next = &free_head->next;
- }
- } else {
- free_head = NULL;
- }
}
*last_next = FLOW_MAX;
--
@@ -788,6 +788,7 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
{
struct flow_free_cluster *free_head = NULL;
unsigned *last_next = &flow_first_free;
+ bool to_free[FLOW_MAX] = { 0 };
bool timer = false;
union flow *flow;
@@ -798,9 +799,44 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
ASSERT(!flow_new_entry); /* Incomplete flow at end of cycle */
- flow_foreach_slot(flow) {
+ /* Check which flows we might need to close first, but don't free them
+ * yet as it's not safe to do that in the middle of flow_foreach().
+ */
+ flow_foreach(flow) {
bool closed = false;
+ switch (flow->f.type) {
+ case FLOW_TYPE_NONE:
+ ASSERT(false);
+ break;
+ case FLOW_TCP:
+ closed = tcp_flow_defer(&flow->tcp);
+ break;
+ case FLOW_TCP_SPLICE:
+ closed = tcp_splice_flow_defer(&flow->tcp_splice);
+ if (!closed && timer)
+ tcp_splice_timer(c, &flow->tcp_splice);
+ break;
+ case FLOW_PING4:
+ case FLOW_PING6:
+ if (timer)
+ closed = icmp_ping_timer(c, &flow->ping, now);
+ break;
+ case FLOW_UDP:
+ closed = udp_flow_defer(c, &flow->udp, now);
+ if (!closed && timer)
+ closed = udp_flow_timer(c, &flow->udp, now);
+ break;
+ default:
+ /* Assume other flow types don't need any handling */
+ ;
+ }
+
+ to_free[FLOW_IDX(flow)] = closed;
+ }
+
+ /* Second step: actually free the flows */
+ flow_foreach_slot(flow) {
switch (flow->f.state) {
case FLOW_STATE_FREE: {
unsigned skip = flow->free.n;
@@ -833,60 +869,31 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
break;
case FLOW_STATE_ACTIVE:
- /* Nothing to do */
+ if (to_free[FLOW_IDX(flow)]) {
+ flow_set_state(&flow->f, FLOW_STATE_FREE);
+ memset(flow, 0, sizeof(*flow));
+
+ if (free_head) {
+ /* Add slot to current free cluster */
+ ASSERT(FLOW_IDX(flow) ==
+ FLOW_IDX(free_head) + free_head->n);
+ free_head->n++;
+ flow->free.n = flow->free.next = 0;
+ } else {
+ /* Create new free cluster */
+ free_head = &flow->free;
+ free_head->n = 1;
+ *last_next = FLOW_IDX(flow);
+ last_next = &free_head->next;
+ }
+ } else {
+ free_head = NULL;
+ }
break;
default:
ASSERT(false);
}
-
- switch (flow->f.type) {
- case FLOW_TYPE_NONE:
- ASSERT(false);
- break;
- case FLOW_TCP:
- closed = tcp_flow_defer(&flow->tcp);
- break;
- case FLOW_TCP_SPLICE:
- closed = tcp_splice_flow_defer(&flow->tcp_splice);
- if (!closed && timer)
- tcp_splice_timer(c, &flow->tcp_splice);
- break;
- case FLOW_PING4:
- case FLOW_PING6:
- if (timer)
- closed = icmp_ping_timer(c, &flow->ping, now);
- break;
- case FLOW_UDP:
- closed = udp_flow_defer(c, &flow->udp, now);
- if (!closed && timer)
- closed = udp_flow_timer(c, &flow->udp, now);
- break;
- default:
- /* Assume other flow types don't need any handling */
- ;
- }
-
- if (closed) {
- flow_set_state(&flow->f, FLOW_STATE_FREE);
- memset(flow, 0, sizeof(*flow));
-
- if (free_head) {
- /* Add slot to current free cluster */
- ASSERT(FLOW_IDX(flow) ==
- FLOW_IDX(free_head) + free_head->n);
- free_head->n++;
- flow->free.n = flow->free.next = 0;
- } else {
- /* Create new free cluster */
- free_head = &flow->free;
- free_head->n = 1;
- *last_next = FLOW_IDX(flow);
- last_next = &free_head->next;
- }
- } else {
- free_head = NULL;
- }
}
*last_next = FLOW_MAX;
--
2.49.0
next reply other threads:[~2025-04-30 16:05 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-30 16:05 Laurent Vivier [this message]
2025-05-02 17:26 ` [PATCH] flow: fix podman issue #25959 Stefano Brivio
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250430160525.3642997-1-lvivier@redhat.com \
--to=lvivier@redhat.com \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).