From: Laurent Vivier <lvivier@redhat.com>
To: passt-dev@passt.top
Cc: Laurent Vivier <lvivier@redhat.com>
Subject: [PATCH 4/7] flow: Introduce flow_epoll_set() to centralize epoll operations
Date: Fri, 19 Dec 2025 17:45:15 +0100 [thread overview]
Message-ID: <20251219164518.930012-5-lvivier@redhat.com> (raw)
In-Reply-To: <20251219164518.930012-1-lvivier@redhat.com>
Currently, each flow type (TCP, TCP_SPLICE, PING, UDP) has its own
code to add or modify file descriptors in epoll. This leads to
duplicated boilerplate code across icmp.c, tcp.c, tcp_splice.c, and
udp_flow.c, each setting up epoll_ref unions and calling epoll_ctl()
with flow-type-specific details.
Introduce flow_epoll_set() in flow.c to handle epoll operations for
all flow types in a unified way. The function takes an explicit epoll
type parameter, allowing it to handle not only flow socket types but
also the TCP timer (EPOLL_TYPE_TCP_TIMER).
This will be needed to migrate queue pair from an epollfd to another.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
flow.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++
flow.h | 3 +++
icmp.c | 9 ++-----
tcp.c | 39 +++++++++--------------------
tcp_splice.c | 27 +++++++-------------
udp_flow.c | 12 +--------
6 files changed, 97 insertions(+), 63 deletions(-)
diff --git a/flow.c b/flow.c
index 4f53486586cd..9c98102e3616 100644
--- a/flow.c
+++ b/flow.c
@@ -20,6 +20,7 @@
#include "flow.h"
#include "flow_table.h"
#include "repair.h"
+#include "epoll_ctl.h"
const char *flow_state_str[] = {
[FLOW_STATE_FREE] = "FREE",
@@ -390,6 +391,75 @@ void flow_epollid_clear(struct flow_common *f)
f->epollid = EPOLLFD_ID_INVALID;
}
+/**
+ * flow_epoll_set() - Add or modify epoll registration for a flow socket
+ * @c: Execution context
+ * @type: epoll type
+ * @f: Flow to register socket for
+ * @events: epoll events to watch for
+ * @fd: File descriptor to register
+ * @sidei: Side index of the flow
+ *
+ * Return: 0 on success, -1 on error (from epoll_ctl())
+ */
+int flow_epoll_set(const struct ctx *c, enum epoll_type type,
+ const struct flow_common *f, uint32_t events, int fd,
+ unsigned int sidei)
+{
+ struct epoll_event ev;
+ union epoll_ref ref;
+ int epollfd;
+ int m;
+
+ ref.fd = fd;
+ ref.type = type;
+
+ switch (type) {
+ case EPOLL_TYPE_TCP_SPLICE:
+ case EPOLL_TYPE_TCP:
+ ref.flowside = flow_sidx(f, sidei);
+ if (flow_in_epoll(f)) {
+ m = EPOLL_CTL_MOD;
+ epollfd = flow_epollfd(f);
+ } else {
+ m = EPOLL_CTL_ADD;
+ epollfd = c->epollfd;
+ }
+ break;
+ case EPOLL_TYPE_TCP_TIMER: {
+ const struct tcp_tap_conn *conn = &((union flow *)f)->tcp;
+
+ ref.flow = flow_idx(f);
+ m = conn->timer == -1 ? EPOLL_CTL_ADD : EPOLL_CTL_MOD;
+ epollfd = flow_epollfd(f);
+ break;
+ }
+ case EPOLL_TYPE_PING:
+ ref.flowside = flow_sidx(f, sidei);
+ m = EPOLL_CTL_ADD;
+ epollfd = flow_epollfd(f);
+ break;
+ case EPOLL_TYPE_UDP: {
+ union {
+ flow_sidx_t sidx;
+ uint32_t data;
+ } fref = { .sidx = flow_sidx(f, sidei) };
+
+ ref.data = fref.data;
+ m = EPOLL_CTL_ADD;
+ epollfd = flow_epollfd(f);
+ break;
+ }
+ default:
+ ASSERT(0);
+ }
+
+ ev.events = events;
+ ev.data.u64 = ref.u64;
+
+ return epoll_ctl(epollfd, m, fd, &ev);
+}
+
/**
* flow_epollid_register() - Initialize the epoll id -> fd mapping
* @epollid: epoll id to associate to
diff --git a/flow.h b/flow.h
index b43b0b1dd7f2..388fab124238 100644
--- a/flow.h
+++ b/flow.h
@@ -265,6 +265,9 @@ bool flow_in_epoll(const struct flow_common *f);
int flow_epollfd(const struct flow_common *f);
void flow_epollid_set(struct flow_common *f, int epollid);
void flow_epollid_clear(struct flow_common *f);
+int flow_epoll_set(const struct ctx *c, enum epoll_type type,
+ const struct flow_common *f, uint32_t events, int fd,
+ unsigned int sidei);
void flow_epollid_register(int epollid, int epollfd);
void flow_defer_handler(const struct ctx *c, const struct timespec *now);
int flow_migrate_source_early(struct ctx *c, const struct migrate_stage *stage,
diff --git a/icmp.c b/icmp.c
index 9564c4963f7b..235759567060 100644
--- a/icmp.c
+++ b/icmp.c
@@ -177,7 +177,6 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
union flow *flow = flow_alloc();
struct icmp_ping_flow *pingf;
const struct flowside *tgt;
- union epoll_ref ref;
if (!flow)
return NULL;
@@ -211,12 +210,8 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
goto cancel;
flow_epollid_set(&pingf->f, EPOLLFD_ID_DEFAULT);
-
- ref.type = EPOLL_TYPE_PING;
- ref.flowside = FLOW_SIDX(flow, TGTSIDE);
- ref.fd = pingf->sock;
-
- if (epoll_add(flow_epollfd(&pingf->f), EPOLLIN, ref) < 0) {
+ if (flow_epoll_set(c, EPOLL_TYPE_PING, &pingf->f, EPOLLIN, pingf->sock,
+ TGTSIDE) < 0) {
close(pingf->sock);
goto cancel;
}
diff --git a/tcp.c b/tcp.c
index 6e6156098c12..2907af282551 100644
--- a/tcp.c
+++ b/tcp.c
@@ -530,14 +530,10 @@ static uint32_t tcp_conn_epoll_events(uint8_t events, uint8_t conn_flags)
*/
static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
{
- int m = flow_in_epoll(&conn->f) ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
- union epoll_ref ref = { .type = EPOLL_TYPE_TCP, .fd = conn->sock,
- .flowside = FLOW_SIDX(conn, !TAPSIDE(conn)), };
- struct epoll_event ev = { .data.u64 = ref.u64 };
- int epollfd = flow_in_epoll(&conn->f) ? flow_epollfd(&conn->f)
- : c->epollfd;
+ uint32_t events;
if (conn->events == CLOSED) {
+ int epollfd = flow_in_epoll(&conn->f) ? flow_epollfd(&conn->f) : c->epollfd;
if (flow_in_epoll(&conn->f))
epoll_del(epollfd, conn->sock);
if (conn->timer != -1)
@@ -545,22 +541,17 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
return 0;
}
- ev.events = tcp_conn_epoll_events(conn->events, conn->flags);
+ events = tcp_conn_epoll_events(conn->events, conn->flags);
- if (epoll_ctl(epollfd, m, conn->sock, &ev))
+ if (flow_epoll_set(c, EPOLL_TYPE_TCP, &conn->f, events, conn->sock,
+ !TAPSIDE(conn)))
return -errno;
flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
if (conn->timer != -1) {
- union epoll_ref ref_t = { .type = EPOLL_TYPE_TCP_TIMER,
- .fd = conn->timer,
- .flow = FLOW_IDX(conn) };
- struct epoll_event ev_t = { .data.u64 = ref_t.u64,
- .events = EPOLLIN | EPOLLET };
-
- if (epoll_ctl(flow_epollfd(&conn->f), EPOLL_CTL_MOD,
- conn->timer, &ev_t))
+ if (flow_epoll_set(c, EPOLL_TYPE_TCP_TIMER, &conn->f,
+ EPOLLIN | EPOLLET, conn->timer, 0))
return -errno;
}
@@ -581,11 +572,6 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
return;
if (conn->timer == -1) {
- union epoll_ref ref = { .type = EPOLL_TYPE_TCP_TIMER,
- .flow = FLOW_IDX(conn) };
- struct epoll_event ev = { .data.u64 = ref.u64,
- .events = EPOLLIN | EPOLLET };
- int epollfd = flow_epollfd(&conn->f);
int fd;
fd = timerfd_create(CLOCK_MONOTONIC, 0);
@@ -593,18 +579,17 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
flow_dbg_perror(conn, "failed to get timer");
if (fd > -1)
close(fd);
- conn->timer = -1;
return;
}
- conn->timer = fd;
- ref.fd = conn->timer;
- if (epoll_ctl(epollfd, EPOLL_CTL_ADD, conn->timer, &ev)) {
+ if (flow_epoll_set(c, EPOLL_TYPE_TCP_TIMER, &conn->f,
+ EPOLLIN | EPOLLET, fd, 0)) {
flow_dbg_perror(conn, "failed to add timer");
- close(conn->timer);
- conn->timer = -1;
+ close(fd);
return;
}
+
+ conn->timer = fd;
}
if (conn->flags & ACK_TO_TAP_DUE) {
diff --git a/tcp_splice.c b/tcp_splice.c
index bf4ff466de07..7367f435367b 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -143,24 +143,15 @@ static uint32_t tcp_splice_conn_epoll_events(uint16_t events, unsigned sidei)
static int tcp_splice_epoll_ctl(const struct ctx *c,
struct tcp_splice_conn *conn)
{
- int epollfd = flow_in_epoll(&conn->f) ? flow_epollfd(&conn->f)
- : c->epollfd;
- int m = flow_in_epoll(&conn->f) ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
- const union epoll_ref ref[SIDES] = {
- { .type = EPOLL_TYPE_TCP_SPLICE, .fd = conn->s[0],
- .flowside = FLOW_SIDX(conn, 0) },
- { .type = EPOLL_TYPE_TCP_SPLICE, .fd = conn->s[1],
- .flowside = FLOW_SIDX(conn, 1) }
- };
- struct epoll_event ev[SIDES] = { { .data.u64 = ref[0].u64 },
- { .data.u64 = ref[1].u64 } };
-
- ev[0].events = tcp_splice_conn_epoll_events(conn->events, 0);
- ev[1].events = tcp_splice_conn_epoll_events(conn->events, 1);
-
-
- if (epoll_ctl(epollfd, m, conn->s[0], &ev[0]) ||
- epoll_ctl(epollfd, m, conn->s[1], &ev[1])) {
+ uint32_t events[2];
+
+ events[0] = tcp_splice_conn_epoll_events(conn->events, 0);
+ events[1] = tcp_splice_conn_epoll_events(conn->events, 1);
+
+ if (flow_epoll_set(c, EPOLL_TYPE_TCP_SPLICE, &conn->f, events[0],
+ conn->s[0], 0) ||
+ flow_epoll_set(c, EPOLL_TYPE_TCP_SPLICE, &conn->f, events[1],
+ conn->s[1], 1)) {
int ret = -errno;
flow_perror(conn, "ERROR on epoll_ctl()");
return ret;
diff --git a/udp_flow.c b/udp_flow.c
index 33f29f21e69e..42f3622d621c 100644
--- a/udp_flow.c
+++ b/udp_flow.c
@@ -74,11 +74,6 @@ static int udp_flow_sock(const struct ctx *c,
{
const struct flowside *side = &uflow->f.side[sidei];
uint8_t pif = uflow->f.pif[sidei];
- union {
- flow_sidx_t sidx;
- uint32_t data;
- } fref = { .sidx = FLOW_SIDX(uflow, sidei) };
- union epoll_ref ref;
int rc;
int s;
@@ -88,13 +83,8 @@ static int udp_flow_sock(const struct ctx *c,
return s;
}
- ref.type = EPOLL_TYPE_UDP;
- ref.data = fref.data;
- ref.fd = s;
-
flow_epollid_set(&uflow->f, EPOLLFD_ID_DEFAULT);
-
- rc = epoll_add(flow_epollfd(&uflow->f), EPOLLIN, ref);
+ rc = flow_epoll_set(c, EPOLL_TYPE_UDP, &uflow->f, EPOLLIN, s, sidei);
if (rc < 0) {
close(s);
return rc;
--
2.51.1
next prev parent reply other threads:[~2025-12-19 16:45 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-12-19 16:45 [PATCH 0/7] " Laurent Vivier
2025-12-19 16:45 ` [PATCH 1/7] tcp: Update EPOLL_TYPE_TCP_TIMER fd Laurent Vivier
2025-12-19 16:45 ` [PATCH 2/7] udp_flow: Assign socket to flow inside udp_flow_sock() Laurent Vivier
2025-12-19 16:45 ` [PATCH 3/7] tcp_splice: Refactor tcp_splice_conn_epoll_events() to per-side computation Laurent Vivier
2025-12-19 16:45 ` Laurent Vivier [this message]
2025-12-19 16:45 ` [PATCH 5/7] flow: Use epoll_id_to_fd[EPOLLFD_ID_DEFAULT] in flow_epollid_set() Laurent Vivier
2025-12-19 16:45 ` [PATCH 6/7] flow: Compute epoll events inside flow_epoll_set() Laurent Vivier
2025-12-19 16:45 ` [PATCH 7/7] flow: Have flow_epoll_set() retrieve file descriptor from flow structure Laurent Vivier
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251219164518.930012-5-lvivier@redhat.com \
--to=lvivier@redhat.com \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).