* [PATCH 1/7] tcp: Update EPOLL_TYPE_TCP_TIMER fd
2025-12-19 16:45 [PATCH 0/7] flow: Introduce flow_epoll_set() to centralize epoll operations Laurent Vivier
@ 2025-12-19 16:45 ` Laurent Vivier
2025-12-19 16:45 ` [PATCH 2/7] udp_flow: Assign socket to flow inside udp_flow_sock() Laurent Vivier
` (5 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Laurent Vivier @ 2025-12-19 16:45 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
For consistency with other epoll events, set the fd field to the file
descriptor actually added by epoll_ctl() (conn->timer), rather than
conn->sock.
This is a no-op change as ref.fd is not currently used in
tcp_timer_handler().
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
tcp.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tcp.c b/tcp.c
index b179e3991236..6e6156098c12 100644
--- a/tcp.c
+++ b/tcp.c
@@ -554,7 +554,7 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
if (conn->timer != -1) {
union epoll_ref ref_t = { .type = EPOLL_TYPE_TCP_TIMER,
- .fd = conn->sock,
+ .fd = conn->timer,
.flow = FLOW_IDX(conn) };
struct epoll_event ev_t = { .data.u64 = ref_t.u64,
.events = EPOLLIN | EPOLLET };
@@ -582,7 +582,6 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
if (conn->timer == -1) {
union epoll_ref ref = { .type = EPOLL_TYPE_TCP_TIMER,
- .fd = conn->sock,
.flow = FLOW_IDX(conn) };
struct epoll_event ev = { .data.u64 = ref.u64,
.events = EPOLLIN | EPOLLET };
@@ -598,6 +597,7 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
return;
}
conn->timer = fd;
+ ref.fd = conn->timer;
if (epoll_ctl(epollfd, EPOLL_CTL_ADD, conn->timer, &ev)) {
flow_dbg_perror(conn, "failed to add timer");
--
2.51.1
^ permalink raw reply [flat|nested] 8+ messages in thread* [PATCH 2/7] udp_flow: Assign socket to flow inside udp_flow_sock()
2025-12-19 16:45 [PATCH 0/7] flow: Introduce flow_epoll_set() to centralize epoll operations Laurent Vivier
2025-12-19 16:45 ` [PATCH 1/7] tcp: Update EPOLL_TYPE_TCP_TIMER fd Laurent Vivier
@ 2025-12-19 16:45 ` Laurent Vivier
2025-12-19 16:45 ` [PATCH 3/7] tcp_splice: Refactor tcp_splice_conn_epoll_events() to per-side computation Laurent Vivier
` (4 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Laurent Vivier @ 2025-12-19 16:45 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
Move the assignment of uflow->s[sidei] from the caller (udp_flow_new())
into udp_flow_sock() itself, placing it after the successful connect().
This is a pure refactoring with no functional change. The socket fd is
now assigned within udp_flow_sock() where the socket is created, rather
than requiring the caller to capture the return value. On error paths,
uflow->s[sidei] remains at its initialized value of -1 rather than being
set to the negative error code, which is semantically cleaner (though
functionally equivalent given the >= 0 check in udp_flow_close()).
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
udp_flow.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/udp_flow.c b/udp_flow.c
index 8907f2f72741..33f29f21e69e 100644
--- a/udp_flow.c
+++ b/udp_flow.c
@@ -109,6 +109,7 @@ static int udp_flow_sock(const struct ctx *c,
flow_dbg_perror(uflow, "Couldn't connect flow socket");
return rc;
}
+ uflow->s[sidei] = s;
/* It's possible, if unlikely, that we could receive some packets in
* between the bind() and connect() which may or may not be for this
@@ -163,7 +164,7 @@ static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow,
flow_foreach_sidei(sidei) {
if (pif_is_socket(uflow->f.pif[sidei]))
- if ((uflow->s[sidei] = udp_flow_sock(c, uflow, sidei)) < 0)
+ if (udp_flow_sock(c, uflow, sidei) < 0)
goto cancel;
}
--
2.51.1
^ permalink raw reply [flat|nested] 8+ messages in thread* [PATCH 3/7] tcp_splice: Refactor tcp_splice_conn_epoll_events() to per-side computation
2025-12-19 16:45 [PATCH 0/7] flow: Introduce flow_epoll_set() to centralize epoll operations Laurent Vivier
2025-12-19 16:45 ` [PATCH 1/7] tcp: Update EPOLL_TYPE_TCP_TIMER fd Laurent Vivier
2025-12-19 16:45 ` [PATCH 2/7] udp_flow: Assign socket to flow inside udp_flow_sock() Laurent Vivier
@ 2025-12-19 16:45 ` Laurent Vivier
2025-12-19 16:45 ` [PATCH 4/7] flow: Introduce flow_epoll_set() to centralize epoll operations Laurent Vivier
` (3 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Laurent Vivier @ 2025-12-19 16:45 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
The function tcp_splice_conn_epoll_events() currently takes an array of
struct epoll_event and fills in the .events field for both sides using
flow_foreach_sidei() loops.
This works, but the function is doing two conceptually separate things
at once: computing events for side 0 and computing events for side 1.
The OUT_WAIT handling is particularly subtle, as it has cross-side
effects: when OUT_WAIT(sidei) is set, we add EPOLLOUT to ev[sidei] but
also remove EPOLLIN from ev[!sidei].
Refactor to make the function compute events for a single side at a
time, taking sidei as a parameter and returning uint32_t. This makes
the logic more focused and easier to follow. The cross-side effects of
OUT_WAIT are preserved by checking both OUT_WAIT(sidei) and
OUT_WAIT(!sidei) within each call.
The caller tcp_splice_epoll_ctl() now invokes the function twice, once
for each side, making the two-sided nature of the operation explicit.
No functional change.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
tcp_splice.c | 33 ++++++++++++++-------------------
1 file changed, 14 insertions(+), 19 deletions(-)
diff --git a/tcp_splice.c b/tcp_splice.c
index 440522449c13..bf4ff466de07 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -114,29 +114,23 @@ static struct tcp_splice_conn *conn_at_sidx(flow_sidx_t sidx)
* @events: Connection event flags
* @ev: Events to fill in, 0 is accepted socket, 1 is connecting socket
*/
-static void tcp_splice_conn_epoll_events(uint16_t events,
- struct epoll_event ev[])
+static uint32_t tcp_splice_conn_epoll_events(uint16_t events, unsigned sidei)
{
- unsigned sidei;
-
- flow_foreach_sidei(sidei)
- ev[sidei].events = 0;
+ uint32_t e = 0;
if (events & SPLICE_ESTABLISHED) {
- flow_foreach_sidei(sidei) {
- if (!(events & FIN_SENT(!sidei)))
- ev[sidei].events = EPOLLIN | EPOLLRDHUP;
- }
- } else if (events & SPLICE_CONNECT) {
- ev[1].events = EPOLLOUT;
+ if (!(events & FIN_SENT(!sidei)))
+ e = EPOLLIN | EPOLLRDHUP;
+ } else if (sidei == 1 && events & SPLICE_CONNECT) {
+ e = EPOLLOUT;
}
- flow_foreach_sidei(sidei) {
- if (events & OUT_WAIT(sidei)) {
- ev[sidei].events |= EPOLLOUT;
- ev[!sidei].events &= ~EPOLLIN;
- }
- }
+ if (events & OUT_WAIT(sidei))
+ e |= EPOLLOUT;
+ if (events & OUT_WAIT(!sidei))
+ e &= ~EPOLLIN;
+
+ return e;
}
/**
@@ -161,7 +155,8 @@ static int tcp_splice_epoll_ctl(const struct ctx *c,
struct epoll_event ev[SIDES] = { { .data.u64 = ref[0].u64 },
{ .data.u64 = ref[1].u64 } };
- tcp_splice_conn_epoll_events(conn->events, ev);
+ ev[0].events = tcp_splice_conn_epoll_events(conn->events, 0);
+ ev[1].events = tcp_splice_conn_epoll_events(conn->events, 1);
if (epoll_ctl(epollfd, m, conn->s[0], &ev[0]) ||
--
2.51.1
^ permalink raw reply [flat|nested] 8+ messages in thread* [PATCH 4/7] flow: Introduce flow_epoll_set() to centralize epoll operations
2025-12-19 16:45 [PATCH 0/7] flow: Introduce flow_epoll_set() to centralize epoll operations Laurent Vivier
` (2 preceding siblings ...)
2025-12-19 16:45 ` [PATCH 3/7] tcp_splice: Refactor tcp_splice_conn_epoll_events() to per-side computation Laurent Vivier
@ 2025-12-19 16:45 ` Laurent Vivier
2025-12-19 16:45 ` [PATCH 5/7] flow: Use epoll_id_to_fd[EPOLLFD_ID_DEFAULT] in flow_epollid_set() Laurent Vivier
` (2 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Laurent Vivier @ 2025-12-19 16:45 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
Currently, each flow type (TCP, TCP_SPLICE, PING, UDP) has its own
code to add or modify file descriptors in epoll. This leads to
duplicated boilerplate code across icmp.c, tcp.c, tcp_splice.c, and
udp_flow.c, each setting up epoll_ref unions and calling epoll_ctl()
with flow-type-specific details.
Introduce flow_epoll_set() in flow.c to handle epoll operations for
all flow types in a unified way. The function takes an explicit epoll
type parameter, allowing it to handle not only flow socket types but
also the TCP timer (EPOLL_TYPE_TCP_TIMER).
This will be needed to migrate queue pair from an epollfd to another.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
flow.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++
flow.h | 3 +++
icmp.c | 9 ++-----
tcp.c | 39 +++++++++--------------------
tcp_splice.c | 27 +++++++-------------
udp_flow.c | 12 +--------
6 files changed, 97 insertions(+), 63 deletions(-)
diff --git a/flow.c b/flow.c
index 4f53486586cd..9c98102e3616 100644
--- a/flow.c
+++ b/flow.c
@@ -20,6 +20,7 @@
#include "flow.h"
#include "flow_table.h"
#include "repair.h"
+#include "epoll_ctl.h"
const char *flow_state_str[] = {
[FLOW_STATE_FREE] = "FREE",
@@ -390,6 +391,75 @@ void flow_epollid_clear(struct flow_common *f)
f->epollid = EPOLLFD_ID_INVALID;
}
+/**
+ * flow_epoll_set() - Add or modify epoll registration for a flow socket
+ * @c: Execution context
+ * @type: epoll type
+ * @f: Flow to register socket for
+ * @events: epoll events to watch for
+ * @fd: File descriptor to register
+ * @sidei: Side index of the flow
+ *
+ * Return: 0 on success, -1 on error (from epoll_ctl())
+ */
+int flow_epoll_set(const struct ctx *c, enum epoll_type type,
+ const struct flow_common *f, uint32_t events, int fd,
+ unsigned int sidei)
+{
+ struct epoll_event ev;
+ union epoll_ref ref;
+ int epollfd;
+ int m;
+
+ ref.fd = fd;
+ ref.type = type;
+
+ switch (type) {
+ case EPOLL_TYPE_TCP_SPLICE:
+ case EPOLL_TYPE_TCP:
+ ref.flowside = flow_sidx(f, sidei);
+ if (flow_in_epoll(f)) {
+ m = EPOLL_CTL_MOD;
+ epollfd = flow_epollfd(f);
+ } else {
+ m = EPOLL_CTL_ADD;
+ epollfd = c->epollfd;
+ }
+ break;
+ case EPOLL_TYPE_TCP_TIMER: {
+ const struct tcp_tap_conn *conn = &((union flow *)f)->tcp;
+
+ ref.flow = flow_idx(f);
+ m = conn->timer == -1 ? EPOLL_CTL_ADD : EPOLL_CTL_MOD;
+ epollfd = flow_epollfd(f);
+ break;
+ }
+ case EPOLL_TYPE_PING:
+ ref.flowside = flow_sidx(f, sidei);
+ m = EPOLL_CTL_ADD;
+ epollfd = flow_epollfd(f);
+ break;
+ case EPOLL_TYPE_UDP: {
+ union {
+ flow_sidx_t sidx;
+ uint32_t data;
+ } fref = { .sidx = flow_sidx(f, sidei) };
+
+ ref.data = fref.data;
+ m = EPOLL_CTL_ADD;
+ epollfd = flow_epollfd(f);
+ break;
+ }
+ default:
+ ASSERT(0);
+ }
+
+ ev.events = events;
+ ev.data.u64 = ref.u64;
+
+ return epoll_ctl(epollfd, m, fd, &ev);
+}
+
/**
* flow_epollid_register() - Initialize the epoll id -> fd mapping
* @epollid: epoll id to associate to
diff --git a/flow.h b/flow.h
index b43b0b1dd7f2..388fab124238 100644
--- a/flow.h
+++ b/flow.h
@@ -265,6 +265,9 @@ bool flow_in_epoll(const struct flow_common *f);
int flow_epollfd(const struct flow_common *f);
void flow_epollid_set(struct flow_common *f, int epollid);
void flow_epollid_clear(struct flow_common *f);
+int flow_epoll_set(const struct ctx *c, enum epoll_type type,
+ const struct flow_common *f, uint32_t events, int fd,
+ unsigned int sidei);
void flow_epollid_register(int epollid, int epollfd);
void flow_defer_handler(const struct ctx *c, const struct timespec *now);
int flow_migrate_source_early(struct ctx *c, const struct migrate_stage *stage,
diff --git a/icmp.c b/icmp.c
index 9564c4963f7b..235759567060 100644
--- a/icmp.c
+++ b/icmp.c
@@ -177,7 +177,6 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
union flow *flow = flow_alloc();
struct icmp_ping_flow *pingf;
const struct flowside *tgt;
- union epoll_ref ref;
if (!flow)
return NULL;
@@ -211,12 +210,8 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
goto cancel;
flow_epollid_set(&pingf->f, EPOLLFD_ID_DEFAULT);
-
- ref.type = EPOLL_TYPE_PING;
- ref.flowside = FLOW_SIDX(flow, TGTSIDE);
- ref.fd = pingf->sock;
-
- if (epoll_add(flow_epollfd(&pingf->f), EPOLLIN, ref) < 0) {
+ if (flow_epoll_set(c, EPOLL_TYPE_PING, &pingf->f, EPOLLIN, pingf->sock,
+ TGTSIDE) < 0) {
close(pingf->sock);
goto cancel;
}
diff --git a/tcp.c b/tcp.c
index 6e6156098c12..2907af282551 100644
--- a/tcp.c
+++ b/tcp.c
@@ -530,14 +530,10 @@ static uint32_t tcp_conn_epoll_events(uint8_t events, uint8_t conn_flags)
*/
static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
{
- int m = flow_in_epoll(&conn->f) ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
- union epoll_ref ref = { .type = EPOLL_TYPE_TCP, .fd = conn->sock,
- .flowside = FLOW_SIDX(conn, !TAPSIDE(conn)), };
- struct epoll_event ev = { .data.u64 = ref.u64 };
- int epollfd = flow_in_epoll(&conn->f) ? flow_epollfd(&conn->f)
- : c->epollfd;
+ uint32_t events;
if (conn->events == CLOSED) {
+ int epollfd = flow_in_epoll(&conn->f) ? flow_epollfd(&conn->f) : c->epollfd;
if (flow_in_epoll(&conn->f))
epoll_del(epollfd, conn->sock);
if (conn->timer != -1)
@@ -545,22 +541,17 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
return 0;
}
- ev.events = tcp_conn_epoll_events(conn->events, conn->flags);
+ events = tcp_conn_epoll_events(conn->events, conn->flags);
- if (epoll_ctl(epollfd, m, conn->sock, &ev))
+ if (flow_epoll_set(c, EPOLL_TYPE_TCP, &conn->f, events, conn->sock,
+ !TAPSIDE(conn)))
return -errno;
flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
if (conn->timer != -1) {
- union epoll_ref ref_t = { .type = EPOLL_TYPE_TCP_TIMER,
- .fd = conn->timer,
- .flow = FLOW_IDX(conn) };
- struct epoll_event ev_t = { .data.u64 = ref_t.u64,
- .events = EPOLLIN | EPOLLET };
-
- if (epoll_ctl(flow_epollfd(&conn->f), EPOLL_CTL_MOD,
- conn->timer, &ev_t))
+ if (flow_epoll_set(c, EPOLL_TYPE_TCP_TIMER, &conn->f,
+ EPOLLIN | EPOLLET, conn->timer, 0))
return -errno;
}
@@ -581,11 +572,6 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
return;
if (conn->timer == -1) {
- union epoll_ref ref = { .type = EPOLL_TYPE_TCP_TIMER,
- .flow = FLOW_IDX(conn) };
- struct epoll_event ev = { .data.u64 = ref.u64,
- .events = EPOLLIN | EPOLLET };
- int epollfd = flow_epollfd(&conn->f);
int fd;
fd = timerfd_create(CLOCK_MONOTONIC, 0);
@@ -593,18 +579,17 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
flow_dbg_perror(conn, "failed to get timer");
if (fd > -1)
close(fd);
- conn->timer = -1;
return;
}
- conn->timer = fd;
- ref.fd = conn->timer;
- if (epoll_ctl(epollfd, EPOLL_CTL_ADD, conn->timer, &ev)) {
+ if (flow_epoll_set(c, EPOLL_TYPE_TCP_TIMER, &conn->f,
+ EPOLLIN | EPOLLET, fd, 0)) {
flow_dbg_perror(conn, "failed to add timer");
- close(conn->timer);
- conn->timer = -1;
+ close(fd);
return;
}
+
+ conn->timer = fd;
}
if (conn->flags & ACK_TO_TAP_DUE) {
diff --git a/tcp_splice.c b/tcp_splice.c
index bf4ff466de07..7367f435367b 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -143,24 +143,15 @@ static uint32_t tcp_splice_conn_epoll_events(uint16_t events, unsigned sidei)
static int tcp_splice_epoll_ctl(const struct ctx *c,
struct tcp_splice_conn *conn)
{
- int epollfd = flow_in_epoll(&conn->f) ? flow_epollfd(&conn->f)
- : c->epollfd;
- int m = flow_in_epoll(&conn->f) ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
- const union epoll_ref ref[SIDES] = {
- { .type = EPOLL_TYPE_TCP_SPLICE, .fd = conn->s[0],
- .flowside = FLOW_SIDX(conn, 0) },
- { .type = EPOLL_TYPE_TCP_SPLICE, .fd = conn->s[1],
- .flowside = FLOW_SIDX(conn, 1) }
- };
- struct epoll_event ev[SIDES] = { { .data.u64 = ref[0].u64 },
- { .data.u64 = ref[1].u64 } };
-
- ev[0].events = tcp_splice_conn_epoll_events(conn->events, 0);
- ev[1].events = tcp_splice_conn_epoll_events(conn->events, 1);
-
-
- if (epoll_ctl(epollfd, m, conn->s[0], &ev[0]) ||
- epoll_ctl(epollfd, m, conn->s[1], &ev[1])) {
+ uint32_t events[2];
+
+ events[0] = tcp_splice_conn_epoll_events(conn->events, 0);
+ events[1] = tcp_splice_conn_epoll_events(conn->events, 1);
+
+ if (flow_epoll_set(c, EPOLL_TYPE_TCP_SPLICE, &conn->f, events[0],
+ conn->s[0], 0) ||
+ flow_epoll_set(c, EPOLL_TYPE_TCP_SPLICE, &conn->f, events[1],
+ conn->s[1], 1)) {
int ret = -errno;
flow_perror(conn, "ERROR on epoll_ctl()");
return ret;
diff --git a/udp_flow.c b/udp_flow.c
index 33f29f21e69e..42f3622d621c 100644
--- a/udp_flow.c
+++ b/udp_flow.c
@@ -74,11 +74,6 @@ static int udp_flow_sock(const struct ctx *c,
{
const struct flowside *side = &uflow->f.side[sidei];
uint8_t pif = uflow->f.pif[sidei];
- union {
- flow_sidx_t sidx;
- uint32_t data;
- } fref = { .sidx = FLOW_SIDX(uflow, sidei) };
- union epoll_ref ref;
int rc;
int s;
@@ -88,13 +83,8 @@ static int udp_flow_sock(const struct ctx *c,
return s;
}
- ref.type = EPOLL_TYPE_UDP;
- ref.data = fref.data;
- ref.fd = s;
-
flow_epollid_set(&uflow->f, EPOLLFD_ID_DEFAULT);
-
- rc = epoll_add(flow_epollfd(&uflow->f), EPOLLIN, ref);
+ rc = flow_epoll_set(c, EPOLL_TYPE_UDP, &uflow->f, EPOLLIN, s, sidei);
if (rc < 0) {
close(s);
return rc;
--
2.51.1
^ permalink raw reply [flat|nested] 8+ messages in thread* [PATCH 5/7] flow: Use epoll_id_to_fd[EPOLLFD_ID_DEFAULT] in flow_epollid_set()
2025-12-19 16:45 [PATCH 0/7] flow: Introduce flow_epoll_set() to centralize epoll operations Laurent Vivier
` (3 preceding siblings ...)
2025-12-19 16:45 ` [PATCH 4/7] flow: Introduce flow_epoll_set() to centralize epoll operations Laurent Vivier
@ 2025-12-19 16:45 ` Laurent Vivier
2025-12-19 16:45 ` [PATCH 6/7] flow: Compute epoll events inside flow_epoll_set() Laurent Vivier
2025-12-19 16:45 ` [PATCH 7/7] flow: Have flow_epoll_set() retrieve file descriptor from flow structure Laurent Vivier
6 siblings, 0 replies; 8+ messages in thread
From: Laurent Vivier @ 2025-12-19 16:45 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
In tcp_epoll_ctl() and tcp_splice_epoll_ctl(), flow_epollid_set() is
called after flow_epoll_set() to set the epollid to EPOLLFD_ID_DEFAULT.
When updating an existing flow (EPOLL_CTL_MOD), flow_epoll_set() already
retrieves the correct epoll fd via flow_epollfd(). For new additions
(EPOLL_CTL_ADD), we can use epoll_id_to_fd[EPOLLFD_ID_DEFAULT] directly
instead of c->epollfd, since that's the epollid that will be set.
This removes the need for the ctx parameter from flow_epoll_set() and
simplifies the API. The change cascades through all callers: tcp.c,
icmp.c, udp_flow.c, and tcp_splice.c.
In tcp_splice.c, removing ctx from flow_epoll_set() calls allows us to
also remove the ctx parameter from tcp_splice_epoll_ctl() and
conn_event_do(), further simplifying the splice connection handling.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
flow.c | 8 +++-----
flow.h | 5 ++---
icmp.c | 2 +-
tcp.c | 6 +++---
tcp_splice.c | 32 ++++++++++++++------------------
udp_flow.c | 2 +-
6 files changed, 24 insertions(+), 31 deletions(-)
diff --git a/flow.c b/flow.c
index 9c98102e3616..a3f75015a078 100644
--- a/flow.c
+++ b/flow.c
@@ -393,7 +393,6 @@ void flow_epollid_clear(struct flow_common *f)
/**
* flow_epoll_set() - Add or modify epoll registration for a flow socket
- * @c: Execution context
* @type: epoll type
* @f: Flow to register socket for
* @events: epoll events to watch for
@@ -402,9 +401,8 @@ void flow_epollid_clear(struct flow_common *f)
*
* Return: 0 on success, -1 on error (from epoll_ctl())
*/
-int flow_epoll_set(const struct ctx *c, enum epoll_type type,
- const struct flow_common *f, uint32_t events, int fd,
- unsigned int sidei)
+int flow_epoll_set(enum epoll_type type, const struct flow_common *f,
+ uint32_t events, int fd, unsigned int sidei)
{
struct epoll_event ev;
union epoll_ref ref;
@@ -423,7 +421,7 @@ int flow_epoll_set(const struct ctx *c, enum epoll_type type,
epollfd = flow_epollfd(f);
} else {
m = EPOLL_CTL_ADD;
- epollfd = c->epollfd;
+ epollfd = epoll_id_to_fd[EPOLLFD_ID_DEFAULT];
}
break;
case EPOLL_TYPE_TCP_TIMER: {
diff --git a/flow.h b/flow.h
index 388fab124238..9740ede8963e 100644
--- a/flow.h
+++ b/flow.h
@@ -265,9 +265,8 @@ bool flow_in_epoll(const struct flow_common *f);
int flow_epollfd(const struct flow_common *f);
void flow_epollid_set(struct flow_common *f, int epollid);
void flow_epollid_clear(struct flow_common *f);
-int flow_epoll_set(const struct ctx *c, enum epoll_type type,
- const struct flow_common *f, uint32_t events, int fd,
- unsigned int sidei);
+int flow_epoll_set(enum epoll_type type, const struct flow_common *f,
+ uint32_t events, int fd, unsigned int sidei);
void flow_epollid_register(int epollid, int epollfd);
void flow_defer_handler(const struct ctx *c, const struct timespec *now);
int flow_migrate_source_early(struct ctx *c, const struct migrate_stage *stage,
diff --git a/icmp.c b/icmp.c
index 235759567060..f0b6fcff1776 100644
--- a/icmp.c
+++ b/icmp.c
@@ -210,7 +210,7 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
goto cancel;
flow_epollid_set(&pingf->f, EPOLLFD_ID_DEFAULT);
- if (flow_epoll_set(c, EPOLL_TYPE_PING, &pingf->f, EPOLLIN, pingf->sock,
+ if (flow_epoll_set(EPOLL_TYPE_PING, &pingf->f, EPOLLIN, pingf->sock,
TGTSIDE) < 0) {
close(pingf->sock);
goto cancel;
diff --git a/tcp.c b/tcp.c
index 2907af282551..ec4f6d2ecc04 100644
--- a/tcp.c
+++ b/tcp.c
@@ -543,14 +543,14 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
events = tcp_conn_epoll_events(conn->events, conn->flags);
- if (flow_epoll_set(c, EPOLL_TYPE_TCP, &conn->f, events, conn->sock,
+ if (flow_epoll_set(EPOLL_TYPE_TCP, &conn->f, events, conn->sock,
!TAPSIDE(conn)))
return -errno;
flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
if (conn->timer != -1) {
- if (flow_epoll_set(c, EPOLL_TYPE_TCP_TIMER, &conn->f,
+ if (flow_epoll_set(EPOLL_TYPE_TCP_TIMER, &conn->f,
EPOLLIN | EPOLLET, conn->timer, 0))
return -errno;
}
@@ -582,7 +582,7 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
return;
}
- if (flow_epoll_set(c, EPOLL_TYPE_TCP_TIMER, &conn->f,
+ if (flow_epoll_set(EPOLL_TYPE_TCP_TIMER, &conn->f,
EPOLLIN | EPOLLET, fd, 0)) {
flow_dbg_perror(conn, "failed to add timer");
close(fd);
diff --git a/tcp_splice.c b/tcp_splice.c
index 7367f435367b..ccf627f4427d 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -135,22 +135,20 @@ static uint32_t tcp_splice_conn_epoll_events(uint16_t events, unsigned sidei)
/**
* tcp_splice_epoll_ctl() - Add/modify/delete epoll state from connection events
- * @c: Execution context
* @conn: Connection pointer
*
* Return: 0 on success, negative error code on failure (not on deletion)
*/
-static int tcp_splice_epoll_ctl(const struct ctx *c,
- struct tcp_splice_conn *conn)
+static int tcp_splice_epoll_ctl(struct tcp_splice_conn *conn)
{
uint32_t events[2];
events[0] = tcp_splice_conn_epoll_events(conn->events, 0);
events[1] = tcp_splice_conn_epoll_events(conn->events, 1);
- if (flow_epoll_set(c, EPOLL_TYPE_TCP_SPLICE, &conn->f, events[0],
+ if (flow_epoll_set(EPOLL_TYPE_TCP_SPLICE, &conn->f, events[0],
conn->s[0], 0) ||
- flow_epoll_set(c, EPOLL_TYPE_TCP_SPLICE, &conn->f, events[1],
+ flow_epoll_set(EPOLL_TYPE_TCP_SPLICE, &conn->f, events[1],
conn->s[1], 1)) {
int ret = -errno;
flow_perror(conn, "ERROR on epoll_ctl()");
@@ -204,12 +202,10 @@ static void conn_flag_do(struct tcp_splice_conn *conn,
/**
* conn_event_do() - Set and log connection events, update epoll state
- * @c: Execution context
* @conn: Connection pointer
* @event: Connection event
*/
-static void conn_event_do(const struct ctx *c, struct tcp_splice_conn *conn,
- unsigned long event)
+static void conn_event_do(struct tcp_splice_conn *conn, unsigned long event)
{
if (event & (event - 1)) {
int flag_index = fls(~event);
@@ -231,14 +227,14 @@ static void conn_event_do(const struct ctx *c, struct tcp_splice_conn *conn,
flow_dbg(conn, "%s", tcp_splice_event_str[flag_index]);
}
- if (tcp_splice_epoll_ctl(c, conn))
+ if (tcp_splice_epoll_ctl(conn))
conn_flag(c, conn, CLOSING);
}
-#define conn_event(c, conn, event) \
+#define conn_event(conn, event) \
do { \
flow_trace(conn, "event at %s:%i",__func__, __LINE__); \
- conn_event_do(c, conn, event); \
+ conn_event_do(conn, event); \
} while (0)
@@ -320,7 +316,7 @@ static int tcp_splice_connect_finish(const struct ctx *c,
}
if (!(conn->events & SPLICE_ESTABLISHED))
- conn_event(c, conn, SPLICE_ESTABLISHED);
+ conn_event(conn, SPLICE_ESTABLISHED);
return 0;
}
@@ -367,7 +363,7 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn)
pif_sockaddr(c, &sa, tgtpif, &tgt->eaddr, tgt->eport);
- conn_event(c, conn, SPLICE_CONNECT);
+ conn_event(conn, SPLICE_CONNECT);
if (connect(conn->s[1], &sa.sa, socklen_inany(&sa))) {
if (errno != EINPROGRESS) {
@@ -376,7 +372,7 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn)
return -errno;
}
} else {
- conn_event(c, conn, SPLICE_ESTABLISHED);
+ conn_event(conn, SPLICE_ESTABLISHED);
return tcp_splice_connect_finish(c, conn);
}
@@ -485,14 +481,14 @@ void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref,
if (events & EPOLLOUT) {
fromsidei = !evsidei;
- conn_event(c, conn, ~OUT_WAIT(evsidei));
+ conn_event(conn, ~OUT_WAIT(evsidei));
} else {
fromsidei = evsidei;
}
if (events & EPOLLRDHUP)
/* For side 0 this is fake, but implied */
- conn_event(c, conn, FIN_RCVD(evsidei));
+ conn_event(conn, FIN_RCVD(evsidei));
swap:
eof = 0;
@@ -574,7 +570,7 @@ retry:
if (conn->read[fromsidei] == conn->written[fromsidei])
break;
- conn_event(c, conn, OUT_WAIT(!fromsidei));
+ conn_event(conn, OUT_WAIT(!fromsidei));
break;
}
@@ -596,7 +592,7 @@ retry:
if ((conn->events & FIN_RCVD(sidei)) &&
!(conn->events & FIN_SENT(!sidei))) {
shutdown(conn->s[!sidei], SHUT_WR);
- conn_event(c, conn, FIN_SENT(!sidei));
+ conn_event(conn, FIN_SENT(!sidei));
}
}
}
diff --git a/udp_flow.c b/udp_flow.c
index 42f3622d621c..07ff589670c1 100644
--- a/udp_flow.c
+++ b/udp_flow.c
@@ -84,7 +84,7 @@ static int udp_flow_sock(const struct ctx *c,
}
flow_epollid_set(&uflow->f, EPOLLFD_ID_DEFAULT);
- rc = flow_epoll_set(c, EPOLL_TYPE_UDP, &uflow->f, EPOLLIN, s, sidei);
+ rc = flow_epoll_set(EPOLL_TYPE_UDP, &uflow->f, EPOLLIN, s, sidei);
if (rc < 0) {
close(s);
return rc;
--
2.51.1
^ permalink raw reply [flat|nested] 8+ messages in thread* [PATCH 6/7] flow: Compute epoll events inside flow_epoll_set()
2025-12-19 16:45 [PATCH 0/7] flow: Introduce flow_epoll_set() to centralize epoll operations Laurent Vivier
` (4 preceding siblings ...)
2025-12-19 16:45 ` [PATCH 5/7] flow: Use epoll_id_to_fd[EPOLLFD_ID_DEFAULT] in flow_epollid_set() Laurent Vivier
@ 2025-12-19 16:45 ` Laurent Vivier
2025-12-19 16:45 ` [PATCH 7/7] flow: Have flow_epoll_set() retrieve file descriptor from flow structure Laurent Vivier
6 siblings, 0 replies; 8+ messages in thread
From: Laurent Vivier @ 2025-12-19 16:45 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
Rather than having each caller compute the appropriate epoll events and
pass them to flow_epoll_set(), move the event computation into
flow_epoll_set() itself. The function already switches on epoll_type to
determine other parameters, so computing events there is a natural fit.
Move tcp_conn_epoll_events() from tcp.c and tcp_splice_conn_epoll_events()
from tcp_splice.c into flow.c where they can be called internally. For
simpler cases (PING, UDP, TCP_TIMER), the events are constant values that
are now set directly in the switch cases.
This centralizes all epoll event logic in one place, making it easier to
understand and maintain the relationship between flow types and their
epoll configurations.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
flow.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++---
flow.h | 2 +-
icmp.c | 2 +-
tcp.c | 43 ++------------------------
tcp_splice.c | 35 ++-------------------
udp_flow.c | 2 +-
6 files changed, 90 insertions(+), 80 deletions(-)
diff --git a/flow.c b/flow.c
index a3f75015a078..6b9ccca6ef50 100644
--- a/flow.c
+++ b/flow.c
@@ -391,21 +391,77 @@ void flow_epollid_clear(struct flow_common *f)
f->epollid = EPOLLFD_ID_INVALID;
}
+/**
+ * tcp_splice_conn_epoll_events() - epoll events masks for given state
+ * @events: Connection event flags
+ * @ev: Events to fill in, 0 is accepted socket, 1 is connecting socket
+ */
+static uint32_t tcp_splice_conn_epoll_events(uint16_t events, unsigned sidei)
+{
+ uint32_t e = 0;
+
+ if (events & SPLICE_ESTABLISHED) {
+ if (!(events & FIN_SENT(!sidei)))
+ e = EPOLLIN | EPOLLRDHUP;
+ } else if (sidei == 1 && events & SPLICE_CONNECT) {
+ e = EPOLLOUT;
+ }
+
+ if (events & OUT_WAIT(sidei))
+ e |= EPOLLOUT;
+ if (events & OUT_WAIT(!sidei))
+ e &= ~EPOLLIN;
+
+ return e;
+}
+
+/**
+ * tcp_conn_epoll_events() - epoll events mask for given connection state
+ * @events: Current connection events
+ * @conn_flags: Connection flags
+ *
+ * Return: epoll events mask corresponding to implied connection state
+ */
+static uint32_t tcp_conn_epoll_events(uint8_t events, uint8_t conn_flags)
+{
+ if (!events)
+ return 0;
+
+ if (events & ESTABLISHED) {
+ if (events & TAP_FIN_SENT)
+ return EPOLLET;
+
+ if (conn_flags & STALLED) {
+ if (conn_flags & ACK_FROM_TAP_BLOCKS)
+ return EPOLLRDHUP | EPOLLET;
+
+ return EPOLLIN | EPOLLRDHUP | EPOLLET;
+ }
+
+ return EPOLLIN | EPOLLRDHUP;
+ }
+
+ if (events == TAP_SYN_RCVD)
+ return EPOLLOUT | EPOLLET | EPOLLRDHUP;
+
+ return EPOLLET | EPOLLRDHUP;
+}
+
/**
* flow_epoll_set() - Add or modify epoll registration for a flow socket
* @type: epoll type
* @f: Flow to register socket for
- * @events: epoll events to watch for
* @fd: File descriptor to register
* @sidei: Side index of the flow
*
* Return: 0 on success, -1 on error (from epoll_ctl())
*/
int flow_epoll_set(enum epoll_type type, const struct flow_common *f,
- uint32_t events, int fd, unsigned int sidei)
+ int fd, unsigned int sidei)
{
struct epoll_event ev;
union epoll_ref ref;
+ uint32_t events;
int epollfd;
int m;
@@ -413,8 +469,24 @@ int flow_epoll_set(enum epoll_type type, const struct flow_common *f,
ref.type = type;
switch (type) {
- case EPOLL_TYPE_TCP_SPLICE:
- case EPOLL_TYPE_TCP:
+ case EPOLL_TYPE_TCP_SPLICE: {
+ const struct tcp_splice_conn *conn = &((union flow *)f)->tcp_splice;
+
+ ref.flowside = flow_sidx(f, sidei);
+ if (flow_in_epoll(f)) {
+ m = EPOLL_CTL_MOD;
+ epollfd = flow_epollfd(f);
+ } else {
+ m = EPOLL_CTL_ADD;
+ epollfd = epoll_id_to_fd[EPOLLFD_ID_DEFAULT];
+ }
+
+ events = tcp_splice_conn_epoll_events(conn->events, sidei);
+ break;
+ }
+ case EPOLL_TYPE_TCP: {
+ const struct tcp_tap_conn *conn = &((union flow *)f)->tcp;
+
ref.flowside = flow_sidx(f, sidei);
if (flow_in_epoll(f)) {
m = EPOLL_CTL_MOD;
@@ -423,19 +495,24 @@ int flow_epoll_set(enum epoll_type type, const struct flow_common *f,
m = EPOLL_CTL_ADD;
epollfd = epoll_id_to_fd[EPOLLFD_ID_DEFAULT];
}
+
+ events = tcp_conn_epoll_events(conn->events, conn->flags);
break;
+ }
case EPOLL_TYPE_TCP_TIMER: {
const struct tcp_tap_conn *conn = &((union flow *)f)->tcp;
ref.flow = flow_idx(f);
m = conn->timer == -1 ? EPOLL_CTL_ADD : EPOLL_CTL_MOD;
epollfd = flow_epollfd(f);
+ events = EPOLLIN | EPOLLET;
break;
}
case EPOLL_TYPE_PING:
ref.flowside = flow_sidx(f, sidei);
m = EPOLL_CTL_ADD;
epollfd = flow_epollfd(f);
+ events = EPOLLIN;
break;
case EPOLL_TYPE_UDP: {
union {
@@ -446,6 +523,7 @@ int flow_epoll_set(enum epoll_type type, const struct flow_common *f,
ref.data = fref.data;
m = EPOLL_CTL_ADD;
epollfd = flow_epollfd(f);
+ events = EPOLLIN;
break;
}
default:
diff --git a/flow.h b/flow.h
index 9740ede8963e..ed1e16139a4d 100644
--- a/flow.h
+++ b/flow.h
@@ -266,7 +266,7 @@ int flow_epollfd(const struct flow_common *f);
void flow_epollid_set(struct flow_common *f, int epollid);
void flow_epollid_clear(struct flow_common *f);
int flow_epoll_set(enum epoll_type type, const struct flow_common *f,
- uint32_t events, int fd, unsigned int sidei);
+ int fd, unsigned int sidei);
void flow_epollid_register(int epollid, int epollfd);
void flow_defer_handler(const struct ctx *c, const struct timespec *now);
int flow_migrate_source_early(struct ctx *c, const struct migrate_stage *stage,
diff --git a/icmp.c b/icmp.c
index f0b6fcff1776..5487a904117d 100644
--- a/icmp.c
+++ b/icmp.c
@@ -210,7 +210,7 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
goto cancel;
flow_epollid_set(&pingf->f, EPOLLFD_ID_DEFAULT);
- if (flow_epoll_set(EPOLL_TYPE_PING, &pingf->f, EPOLLIN, pingf->sock,
+ if (flow_epoll_set(EPOLL_TYPE_PING, &pingf->f, pingf->sock,
TGTSIDE) < 0) {
close(pingf->sock);
goto cancel;
diff --git a/tcp.c b/tcp.c
index ec4f6d2ecc04..2e8a7d516273 100644
--- a/tcp.c
+++ b/tcp.c
@@ -489,38 +489,6 @@ int tcp_set_peek_offset(const struct tcp_tap_conn *conn, int offset)
return 0;
}
-/**
- * tcp_conn_epoll_events() - epoll events mask for given connection state
- * @events: Current connection events
- * @conn_flags: Connection flags
- *
- * Return: epoll events mask corresponding to implied connection state
- */
-static uint32_t tcp_conn_epoll_events(uint8_t events, uint8_t conn_flags)
-{
- if (!events)
- return 0;
-
- if (events & ESTABLISHED) {
- if (events & TAP_FIN_SENT)
- return EPOLLET;
-
- if (conn_flags & STALLED) {
- if (conn_flags & ACK_FROM_TAP_BLOCKS)
- return EPOLLRDHUP | EPOLLET;
-
- return EPOLLIN | EPOLLRDHUP | EPOLLET;
- }
-
- return EPOLLIN | EPOLLRDHUP;
- }
-
- if (events == TAP_SYN_RCVD)
- return EPOLLOUT | EPOLLET | EPOLLRDHUP;
-
- return EPOLLET | EPOLLRDHUP;
-}
-
/**
* tcp_epoll_ctl() - Add/modify/delete epoll state from connection events
* @c: Execution context
@@ -530,8 +498,6 @@ static uint32_t tcp_conn_epoll_events(uint8_t events, uint8_t conn_flags)
*/
static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
{
- uint32_t events;
-
if (conn->events == CLOSED) {
int epollfd = flow_in_epoll(&conn->f) ? flow_epollfd(&conn->f) : c->epollfd;
if (flow_in_epoll(&conn->f))
@@ -541,9 +507,7 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
return 0;
}
- events = tcp_conn_epoll_events(conn->events, conn->flags);
-
- if (flow_epoll_set(EPOLL_TYPE_TCP, &conn->f, events, conn->sock,
+ if (flow_epoll_set(EPOLL_TYPE_TCP, &conn->f, conn->sock,
!TAPSIDE(conn)))
return -errno;
@@ -551,7 +515,7 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
if (conn->timer != -1) {
if (flow_epoll_set(EPOLL_TYPE_TCP_TIMER, &conn->f,
- EPOLLIN | EPOLLET, conn->timer, 0))
+ conn->timer, 0))
return -errno;
}
@@ -582,8 +546,7 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
return;
}
- if (flow_epoll_set(EPOLL_TYPE_TCP_TIMER, &conn->f,
- EPOLLIN | EPOLLET, fd, 0)) {
+ if (flow_epoll_set(EPOLL_TYPE_TCP_TIMER, &conn->f, fd, 0)) {
flow_dbg_perror(conn, "failed to add timer");
close(fd);
return;
diff --git a/tcp_splice.c b/tcp_splice.c
index ccf627f4427d..1eeb678d534b 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -109,30 +109,6 @@ static struct tcp_splice_conn *conn_at_sidx(flow_sidx_t sidx)
return &flow->tcp_splice;
}
-/**
- * tcp_splice_conn_epoll_events() - epoll events masks for given state
- * @events: Connection event flags
- * @ev: Events to fill in, 0 is accepted socket, 1 is connecting socket
- */
-static uint32_t tcp_splice_conn_epoll_events(uint16_t events, unsigned sidei)
-{
- uint32_t e = 0;
-
- if (events & SPLICE_ESTABLISHED) {
- if (!(events & FIN_SENT(!sidei)))
- e = EPOLLIN | EPOLLRDHUP;
- } else if (sidei == 1 && events & SPLICE_CONNECT) {
- e = EPOLLOUT;
- }
-
- if (events & OUT_WAIT(sidei))
- e |= EPOLLOUT;
- if (events & OUT_WAIT(!sidei))
- e &= ~EPOLLIN;
-
- return e;
-}
-
/**
* tcp_splice_epoll_ctl() - Add/modify/delete epoll state from connection events
* @conn: Connection pointer
@@ -141,15 +117,8 @@ static uint32_t tcp_splice_conn_epoll_events(uint16_t events, unsigned sidei)
*/
static int tcp_splice_epoll_ctl(struct tcp_splice_conn *conn)
{
- uint32_t events[2];
-
- events[0] = tcp_splice_conn_epoll_events(conn->events, 0);
- events[1] = tcp_splice_conn_epoll_events(conn->events, 1);
-
- if (flow_epoll_set(EPOLL_TYPE_TCP_SPLICE, &conn->f, events[0],
- conn->s[0], 0) ||
- flow_epoll_set(EPOLL_TYPE_TCP_SPLICE, &conn->f, events[1],
- conn->s[1], 1)) {
+ if (flow_epoll_set(EPOLL_TYPE_TCP_SPLICE, &conn->f, conn->s[0], 0) ||
+ flow_epoll_set(EPOLL_TYPE_TCP_SPLICE, &conn->f, conn->s[1], 1)) {
int ret = -errno;
flow_perror(conn, "ERROR on epoll_ctl()");
return ret;
diff --git a/udp_flow.c b/udp_flow.c
index 07ff589670c1..afacfb356261 100644
--- a/udp_flow.c
+++ b/udp_flow.c
@@ -84,7 +84,7 @@ static int udp_flow_sock(const struct ctx *c,
}
flow_epollid_set(&uflow->f, EPOLLFD_ID_DEFAULT);
- rc = flow_epoll_set(EPOLL_TYPE_UDP, &uflow->f, EPOLLIN, s, sidei);
+ rc = flow_epoll_set(EPOLL_TYPE_UDP, &uflow->f, s, sidei);
if (rc < 0) {
close(s);
return rc;
--
2.51.1
^ permalink raw reply [flat|nested] 8+ messages in thread* [PATCH 7/7] flow: Have flow_epoll_set() retrieve file descriptor from flow structure
2025-12-19 16:45 [PATCH 0/7] flow: Introduce flow_epoll_set() to centralize epoll operations Laurent Vivier
` (5 preceding siblings ...)
2025-12-19 16:45 ` [PATCH 6/7] flow: Compute epoll events inside flow_epoll_set() Laurent Vivier
@ 2025-12-19 16:45 ` Laurent Vivier
6 siblings, 0 replies; 8+ messages in thread
From: Laurent Vivier @ 2025-12-19 16:45 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
Currently, callers of flow_epoll_set() pass the file descriptor as an
explicit parameter. However, the function already has access to the flow
structure and knows the epoll type, so it can retrieve the appropriate fd
itself.
Remove the fd parameter and have flow_epoll_set() extract the file
descriptor directly from the flow based on the type:
- EPOLL_TYPE_TCP_SPLICE: conn->s[sidei]
- EPOLL_TYPE_TCP: conn->sock
- EPOLL_TYPE_TCP_TIMER: conn->timer
- EPOLL_TYPE_PING: pingf->sock
- EPOLL_TYPE_UDP: uflow->s[sidei]
For TCP timers, the previous logic determined ADD vs MOD by checking
whether conn->timer was -1. Since the timer fd must now be assigned to the
flow before calling flow_epoll_set(), this check would no longer work.
Instead, introduce FLOW_EPOLL_TIMER_ADD and FLOW_EPOLL_TIMER_MOD constants
and have callers specify the operation explicitly via the sidei parameter.
This requires callers to assign the socket or timer fd to the flow
structure before calling flow_epoll_set(), with appropriate cleanup
(resetting to -1) if the epoll registration fails.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
flow.c | 26 +++++++++++++++++++-------
flow.h | 4 +++-
icmp.c | 3 +--
tcp.c | 12 ++++++------
tcp_splice.c | 4 ++--
udp_flow.c | 6 ++++--
6 files changed, 35 insertions(+), 20 deletions(-)
diff --git a/flow.c b/flow.c
index 6b9ccca6ef50..7d6f9f2bf1a9 100644
--- a/flow.c
+++ b/flow.c
@@ -451,13 +451,14 @@ static uint32_t tcp_conn_epoll_events(uint8_t events, uint8_t conn_flags)
* flow_epoll_set() - Add or modify epoll registration for a flow socket
* @type: epoll type
* @f: Flow to register socket for
- * @fd: File descriptor to register
- * @sidei: Side index of the flow
+ * @sidei: Side index of the flow (for most types), or for EPOLL_TYPE_TCP_TIMER:
+ * FLOW_EPOLL_TIMER_ADD to add the timer to epoll,
+ * FLOW_EPOLL_TIMER_MOD to modify an existing registration
*
* Return: 0 on success, -1 on error (from epoll_ctl())
*/
int flow_epoll_set(enum epoll_type type, const struct flow_common *f,
- int fd, unsigned int sidei)
+ unsigned int sidei)
{
struct epoll_event ev;
union epoll_ref ref;
@@ -465,13 +466,13 @@ int flow_epoll_set(enum epoll_type type, const struct flow_common *f,
int epollfd;
int m;
- ref.fd = fd;
ref.type = type;
switch (type) {
case EPOLL_TYPE_TCP_SPLICE: {
const struct tcp_splice_conn *conn = &((union flow *)f)->tcp_splice;
+ ref.fd = conn->s[sidei];
ref.flowside = flow_sidx(f, sidei);
if (flow_in_epoll(f)) {
m = EPOLL_CTL_MOD;
@@ -487,6 +488,7 @@ int flow_epoll_set(enum epoll_type type, const struct flow_common *f,
case EPOLL_TYPE_TCP: {
const struct tcp_tap_conn *conn = &((union flow *)f)->tcp;
+ ref.fd = conn->sock;
ref.flowside = flow_sidx(f, sidei);
if (flow_in_epoll(f)) {
m = EPOLL_CTL_MOD;
@@ -502,24 +504,34 @@ int flow_epoll_set(enum epoll_type type, const struct flow_common *f,
case EPOLL_TYPE_TCP_TIMER: {
const struct tcp_tap_conn *conn = &((union flow *)f)->tcp;
+ ref.fd = conn->timer;
ref.flow = flow_idx(f);
- m = conn->timer == -1 ? EPOLL_CTL_ADD : EPOLL_CTL_MOD;
+ if (sidei == FLOW_EPOLL_TIMER_ADD)
+ m = EPOLL_CTL_ADD;
+ else
+ m = EPOLL_CTL_MOD;
epollfd = flow_epollfd(f);
events = EPOLLIN | EPOLLET;
break;
}
- case EPOLL_TYPE_PING:
+ case EPOLL_TYPE_PING: {
+ const struct icmp_ping_flow *pingf = &((union flow *)f)->ping;
+
+ ref.fd = pingf->sock;
ref.flowside = flow_sidx(f, sidei);
m = EPOLL_CTL_ADD;
epollfd = flow_epollfd(f);
events = EPOLLIN;
break;
+ }
case EPOLL_TYPE_UDP: {
+ const struct udp_flow *uflow = &((union flow *)f)->udp;
union {
flow_sidx_t sidx;
uint32_t data;
} fref = { .sidx = flow_sidx(f, sidei) };
+ ref.fd = uflow->s[sidei];
ref.data = fref.data;
m = EPOLL_CTL_ADD;
epollfd = flow_epollfd(f);
@@ -533,7 +545,7 @@ int flow_epoll_set(enum epoll_type type, const struct flow_common *f,
ev.events = events;
ev.data.u64 = ref.u64;
- return epoll_ctl(epollfd, m, fd, &ev);
+ return epoll_ctl(epollfd, m, ref.fd, &ev);
}
/**
diff --git a/flow.h b/flow.h
index ed1e16139a4d..3a8224dba3b0 100644
--- a/flow.h
+++ b/flow.h
@@ -266,7 +266,9 @@ int flow_epollfd(const struct flow_common *f);
void flow_epollid_set(struct flow_common *f, int epollid);
void flow_epollid_clear(struct flow_common *f);
int flow_epoll_set(enum epoll_type type, const struct flow_common *f,
- int fd, unsigned int sidei);
+ unsigned int sidei);
+#define FLOW_EPOLL_TIMER_MOD 0
+#define FLOW_EPOLL_TIMER_ADD 1
void flow_epollid_register(int epollid, int epollfd);
void flow_defer_handler(const struct ctx *c, const struct timespec *now);
int flow_migrate_source_early(struct ctx *c, const struct migrate_stage *stage,
diff --git a/icmp.c b/icmp.c
index 5487a904117d..2f2fb56ed726 100644
--- a/icmp.c
+++ b/icmp.c
@@ -210,8 +210,7 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
goto cancel;
flow_epollid_set(&pingf->f, EPOLLFD_ID_DEFAULT);
- if (flow_epoll_set(EPOLL_TYPE_PING, &pingf->f, pingf->sock,
- TGTSIDE) < 0) {
+ if (flow_epoll_set(EPOLL_TYPE_PING, &pingf->f, TGTSIDE) < 0) {
close(pingf->sock);
goto cancel;
}
diff --git a/tcp.c b/tcp.c
index 2e8a7d516273..730da457081c 100644
--- a/tcp.c
+++ b/tcp.c
@@ -507,15 +507,14 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
return 0;
}
- if (flow_epoll_set(EPOLL_TYPE_TCP, &conn->f, conn->sock,
- !TAPSIDE(conn)))
+ if (flow_epoll_set(EPOLL_TYPE_TCP, &conn->f, !TAPSIDE(conn)))
return -errno;
flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
if (conn->timer != -1) {
if (flow_epoll_set(EPOLL_TYPE_TCP_TIMER, &conn->f,
- conn->timer, 0))
+ FLOW_EPOLL_TIMER_MOD))
return -errno;
}
@@ -546,13 +545,14 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
return;
}
- if (flow_epoll_set(EPOLL_TYPE_TCP_TIMER, &conn->f, fd, 0)) {
+ conn->timer = fd;
+ if (flow_epoll_set(EPOLL_TYPE_TCP_TIMER, &conn->f,
+ FLOW_EPOLL_TIMER_ADD)) {
flow_dbg_perror(conn, "failed to add timer");
close(fd);
+ conn->timer = -1;
return;
}
-
- conn->timer = fd;
}
if (conn->flags & ACK_TO_TAP_DUE) {
diff --git a/tcp_splice.c b/tcp_splice.c
index 1eeb678d534b..964a559782a3 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -117,8 +117,8 @@ static struct tcp_splice_conn *conn_at_sidx(flow_sidx_t sidx)
*/
static int tcp_splice_epoll_ctl(struct tcp_splice_conn *conn)
{
- if (flow_epoll_set(EPOLL_TYPE_TCP_SPLICE, &conn->f, conn->s[0], 0) ||
- flow_epoll_set(EPOLL_TYPE_TCP_SPLICE, &conn->f, conn->s[1], 1)) {
+ if (flow_epoll_set(EPOLL_TYPE_TCP_SPLICE, &conn->f, 0) ||
+ flow_epoll_set(EPOLL_TYPE_TCP_SPLICE, &conn->f, 1)) {
int ret = -errno;
flow_perror(conn, "ERROR on epoll_ctl()");
return ret;
diff --git a/udp_flow.c b/udp_flow.c
index afacfb356261..9a67ad701c34 100644
--- a/udp_flow.c
+++ b/udp_flow.c
@@ -83,10 +83,12 @@ static int udp_flow_sock(const struct ctx *c,
return s;
}
+ uflow->s[sidei] = s;
flow_epollid_set(&uflow->f, EPOLLFD_ID_DEFAULT);
- rc = flow_epoll_set(EPOLL_TYPE_UDP, &uflow->f, s, sidei);
+ rc = flow_epoll_set(EPOLL_TYPE_UDP, &uflow->f, sidei);
if (rc < 0) {
close(s);
+ uflow->s[sidei] = -1;
return rc;
}
@@ -95,11 +97,11 @@ static int udp_flow_sock(const struct ctx *c,
epoll_del(flow_epollfd(&uflow->f), s);
close(s);
+ uflow->s[sidei] = -1;
flow_dbg_perror(uflow, "Couldn't connect flow socket");
return rc;
}
- uflow->s[sidei] = s;
/* It's possible, if unlikely, that we could receive some packets in
* between the bind() and connect() which may or may not be for this
--
2.51.1
^ permalink raw reply [flat|nested] 8+ messages in thread