On Tue, Jun 16, 2026 at 07:10:50PM +0200, Laurent Vivier wrote: > tcp_defer_handler() uses c->tcp.timer_run, c->tcp.keepalive_run, and > c->tcp.inactivity_run as global timer gates shared across all callers. > In multiqueue mode, multiple qpair workers will call tcp_defer_handler() > concurrently, causing races on these fields. It also unconditionally > runs tcp_payload_flush(), tcp_sock_refill_init(), and tcp_splice_refill() > which operate on global state. > > Add timer_run, keepalive_run, and inactivity_run as parameters so each > caller provides its own per-qpair timer state. Remove the now-unused > fields from struct tcp_ctx and drop timer_init() which only initialised > c->tcp.timer_run. > > Guard tcp_payload_flush() and socket pool refills with qpair == 0 since > they operate on global buffers shared across all queue pairs. > > Signed-off-by: Laurent Vivier > --- > passt.c | 37 +++++++++++++++++-------------------- > tcp.c | 51 +++++++++++++++++++++++++++++++-------------------- > tcp.h | 9 ++------- > 3 files changed, 50 insertions(+), 47 deletions(-) > > diff --git a/passt.c b/passt.c > index bebc2b99f523..ca5973e17317 100644 > --- a/passt.c > +++ b/passt.c > @@ -96,16 +96,23 @@ struct passt_stats { > > /** > * post_handler() - Run periodic and deferred tasks for L4 protocol handlers > - * @c: Execution context > - * @now: Current timestamp > - * @timer_run: Last time the flow timers ran > - * @qpair: Queue pair to process > + * @c: Execution context > + * @now: Current timestamp > + * @timer_run: Last time the flow timers ran > + * @tcp_timer_run: Last time TCP timers ran > + * @keepalive_run: Last time keepalives ran > + * @inactivity_run: Last time inactivity scan ran > + * @qpair: Queue pair to process > */ > static void post_handler(struct ctx *c, const struct timespec *now, > - struct timespec *timer_run, unsigned int qpair) > + struct timespec *timer_run, > + struct timespec *tcp_timer_run, > + time_t *keepalive_run, > + time_t *inactivity_run, unsigned int qpair) This is pretty bulk, and also threads several TCP specific things through this not TCP specific code path. This seems like a place where a qpair_ctx which holds these timestamps would be useful. > { > if (!c->no_tcp) > - tcp_defer_handler(c, now, qpair); > + tcp_defer_handler(c, now, tcp_timer_run, keepalive_run, > + inactivity_run, qpair); > > flow_defer_handler(c, now, timer_run, qpair); > fwd_scan_ports_timer(c, now); > @@ -130,16 +137,6 @@ static void random_init(struct ctx *c) > srandom(seed); > } > > -/** > - * timer_init() - Set initial timestamp for timer runs to current time > - * @c: Execution context > - * @now: Current timestamp > - */ > -static void timer_init(struct ctx *c, const struct timespec *now) > -{ > - c->tcp.timer_run = *now; > -} > - > /** > * proto_update_l2_buf() - Update scatter-gather L2 buffers in protocol handlers > * @eth_d: Ethernet destination address, NULL if unchanged > @@ -221,8 +218,9 @@ static void print_stats(const struct ctx *c, const struct passt_stats *stats, > */ > static void passt_worker(void *opaque, int nfds, struct epoll_event *events) > { > + static time_t keepalive_run, inactivity_run; > static struct passt_stats stats = { 0 }; > - static struct timespec flow_timer_run; > + static struct timespec flow_timer_run, tcp_timer_run; > struct ctx *c = opaque; > struct timespec now; > int i; > @@ -306,7 +304,8 @@ static void passt_worker(void *opaque, int nfds, struct epoll_event *events) > print_stats(c, &stats, &now); > } > > - post_handler(c, &now, &flow_timer_run, QPAIR_DEFAULT); > + post_handler(c, &now, &flow_timer_run, &tcp_timer_run, > + &keepalive_run, &inactivity_run, QPAIR_DEFAULT); > > migrate_handler(c); > } > @@ -433,8 +432,6 @@ int main(int argc, char **argv) > > isolate_postfork(&c); > > - timer_init(&c, &now); > - > loop: > /* NOLINTBEGIN(bugprone-branch-clone): intervals can be the same */ > /* cppcheck-suppress [duplicateValueTernary, unmatchedSuppression] */ > diff --git a/tcp.c b/tcp.c > index f4fe866ba7c3..955012355d69 100644 > --- a/tcp.c > +++ b/tcp.c > @@ -2988,17 +2988,19 @@ int tcp_init(struct ctx *c) > /** > * tcp_keepalive() - Send keepalives for connections which need it > * @c: Execution context > + * @now: Current timestamp > + * @last_run: Last time keepalives ran, updated on run > * @qpair: Queue pair to process > */ > -static void tcp_keepalive(struct ctx *c, const struct timespec *now, > - unsigned int qpair) > +static void tcp_keepalive(const struct ctx *c, const struct timespec *now, > + time_t *last_run, unsigned int qpair) > { > union flow *flow; > > - if (now->tv_sec - c->tcp.keepalive_run < KEEPALIVE_INTERVAL) > + if (now->tv_sec - *last_run < KEEPALIVE_INTERVAL) > return; > > - c->tcp.keepalive_run = now->tv_sec; > + *last_run = now->tv_sec; > > flow_foreach_of_type(flow, FLOW_TCP) { > struct tcp_tap_conn *conn = &flow->tcp; > @@ -3021,18 +3023,20 @@ static void tcp_keepalive(struct ctx *c, const struct timespec *now, > /** > * tcp_inactivity() - Scan for and close long-inactive connections > * @c: Execution context > + * @now: Current timestamp > + * @last_run: Last time inactivity scan ran, updated on run > * @qpair: Queue pair to process > */ > -static void tcp_inactivity(struct ctx *c, const struct timespec *now, > - unsigned int qpair) > +static void tcp_inactivity(const struct ctx *c, const struct timespec *now, > + time_t *last_run, unsigned int qpair) > { > union flow *flow; > > - if (now->tv_sec - c->tcp.inactivity_run < INACTIVITY_INTERVAL) > + if (now->tv_sec - *last_run < INACTIVITY_INTERVAL) > return; > > debug("TCP inactivity scan"); > - c->tcp.inactivity_run = now->tv_sec; > + *last_run = now->tv_sec; > > flow_foreach_of_type(flow, FLOW_TCP) { > struct tcp_tap_conn *conn = &flow->tcp; > @@ -3054,27 +3058,34 @@ static void tcp_inactivity(struct ctx *c, const struct timespec *now, > > /** > * tcp_defer_handler() - Handler for TCP deferred tasks > - * @c: Execution context > - * @now: Current timestamp > - * @qpair: Queue pair to process > + * @c: Execution context > + * @now: Current timestamp > + * @timer_run: Last time TCP timers ran > + * @keepalive_run: Last time keepalives ran > + * @inactivity_run: Last time inactivity scan ran > + * @qpair: Queue pair to process > */ > /* cppcheck-suppress [constParameterPointer, unmatchedSuppression] */ > void tcp_defer_handler(struct ctx *c, const struct timespec *now, > - unsigned int qpair) > + struct timespec *timer_run, time_t *keepalive_run, > + time_t *inactivity_run, unsigned int qpair) This one is TCP specific, but it's still pretty bulky. > { > - tcp_payload_flush(c); > + if (qpair == 0) > + tcp_payload_flush(c); > > - if (timespec_diff_ms(now, &c->tcp.timer_run) < TCP_TIMER_INTERVAL) > + if (timespec_diff_ms(now, timer_run) < TCP_TIMER_INTERVAL) > return; > > - c->tcp.timer_run = *now; > + *timer_run = *now; > > - tcp_sock_refill_init(c); > - if (c->mode == MODE_PASTA) > - tcp_splice_refill(c); > + if (qpair == 0) { > + tcp_sock_refill_init(c); > + if (c->mode == MODE_PASTA) > + tcp_splice_refill(c); > + } > > - tcp_keepalive(c, now, qpair); > - tcp_inactivity(c, now, qpair); > + tcp_keepalive(c, now, keepalive_run, qpair); > + tcp_inactivity(c, now, inactivity_run, qpair); > } > > /** > diff --git a/tcp.h b/tcp.h > index 490f1b140e44..64c75ba481bd 100644 > --- a/tcp.h > +++ b/tcp.h > @@ -32,7 +32,8 @@ int tcp_listen(const struct ctx *c, uint8_t pif, unsigned rule, > const union inany_addr *addr, const char *ifname, in_port_t port); > int tcp_init(struct ctx *c); > void tcp_defer_handler(struct ctx *c, const struct timespec *now, > - unsigned int qpair); > + struct timespec *timer_run, time_t *keepalive_run, > + time_t *inactivity_run, unsigned int qpair); > > void tcp_update_l2_buf(const unsigned char *eth_d); > > @@ -42,24 +43,18 @@ extern bool peek_offset_cap; > * struct tcp_ctx - Execution context for TCP routines > * @scan_in: Port scanning state for inbound packets > * @scan_out: Port scanning state for outbound packets > - * @timer_run: Timestamp of most recent timer run > * @pipe_size: Size of pipes for spliced connections > * @rto_max: Maximum retry timeout (in s) > * @syn_retries: SYN retries using exponential backoff timeout > * @syn_linear_timeouts: SYN retries before using exponential backoff timeout > - * @keepalive_run: Time we last issued tap-side keepalives > - * @inactivity_run: Time we last scanned for inactive connections > */ > struct tcp_ctx { > struct fwd_scan scan_in; > struct fwd_scan scan_out; > - struct timespec timer_run; > size_t pipe_size; > int rto_max; > uint8_t syn_retries; > uint8_t syn_linear_timeouts; > - time_t keepalive_run; > - time_t inactivity_run; > }; > > #endif /* TCP_H */ > -- > 2.54.0 > -- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson