* [PATCH 1/3] tcp_splice: Register fds with epoll at flow creation
2026-01-16 15:52 [PATCH 0/3] Register TCP flows with epoll at creation time Laurent Vivier
@ 2026-01-16 15:52 ` Laurent Vivier
2026-01-19 4:45 ` David Gibson
2026-01-16 15:52 ` [PATCH 2/3] tcp: " Laurent Vivier
2026-01-16 15:52 ` [PATCH 3/3] flow: Remove EPOLLFD_ID_INVALID Laurent Vivier
2 siblings, 1 reply; 10+ messages in thread
From: Laurent Vivier @ 2026-01-16 15:52 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
Register both splice connection sockets with epoll using empty events
(events=0) in tcp_splice_connect(), before initiating the connection.
This allows tcp_splice_epoll_ctl() to always use EPOLL_CTL_MOD, removing
the need to check whether fds are already registered. As a result, the
conditional ADD/MOD logic is no longer needed, simplifying the function.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
tcp_splice.c | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/tcp_splice.c b/tcp_splice.c
index a7c04ca8652a..cb81e012ee4b 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -142,20 +142,12 @@ static uint32_t tcp_splice_conn_epoll_events(uint16_t events, unsigned sidei)
static int tcp_splice_epoll_ctl(struct tcp_splice_conn *conn)
{
uint32_t events[2];
- int m;
-
- if (flow_in_epoll(&conn->f)) {
- m = EPOLL_CTL_MOD;
- } else {
- flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
- m = EPOLL_CTL_ADD;
- }
events[0] = tcp_splice_conn_epoll_events(conn->events, 0);
events[1] = tcp_splice_conn_epoll_events(conn->events, 1);
- if (flow_epoll_set(&conn->f, m, events[0], conn->s[0], 0) ||
- flow_epoll_set(&conn->f, m, events[1], conn->s[1], 1)) {
+ if (flow_epoll_set(&conn->f, EPOLL_CTL_MOD, events[0], conn->s[0], 0) ||
+ flow_epoll_set(&conn->f, EPOLL_CTL_MOD, events[1], conn->s[1], 1)) {
int ret = -errno;
flow_perror(conn, "ERROR on epoll_ctl()");
return ret;
@@ -368,6 +360,14 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn)
pif_sockaddr(c, &sa, tgtpif, &tgt->eaddr, tgt->eport);
+ flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
+ if (flow_epoll_set(&conn->f, EPOLL_CTL_ADD, 0, conn->s[0], 0) ||
+ flow_epoll_set(&conn->f, EPOLL_CTL_ADD, 0, conn->s[1], 1)) {
+ int ret = -errno;
+ flow_perror(conn, "Cannot register to epollfd");
+ return ret;
+ }
+
conn_event(conn, SPLICE_CONNECT);
if (connect(conn->s[1], &sa.sa, socklen_inany(&sa))) {
--
2.52.0
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH 1/3] tcp_splice: Register fds with epoll at flow creation
2026-01-16 15:52 ` [PATCH 1/3] tcp_splice: Register fds with epoll at flow creation Laurent Vivier
@ 2026-01-19 4:45 ` David Gibson
2026-01-19 8:10 ` Laurent Vivier
0 siblings, 1 reply; 10+ messages in thread
From: David Gibson @ 2026-01-19 4:45 UTC (permalink / raw)
To: Laurent Vivier; +Cc: passt-dev
[-- Attachment #1: Type: text/plain, Size: 2562 bytes --]
On Fri, Jan 16, 2026 at 04:52:21PM +0100, Laurent Vivier wrote:
> Register both splice connection sockets with epoll using empty events
> (events=0) in tcp_splice_connect(), before initiating the connection.
>
> This allows tcp_splice_epoll_ctl() to always use EPOLL_CTL_MOD, removing
> the need to check whether fds are already registered. As a result, the
> conditional ADD/MOD logic is no longer needed, simplifying the function.
>
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Nice! One query below.
> ---
> tcp_splice.c | 20 ++++++++++----------
> 1 file changed, 10 insertions(+), 10 deletions(-)
>
> diff --git a/tcp_splice.c b/tcp_splice.c
> index a7c04ca8652a..cb81e012ee4b 100644
> --- a/tcp_splice.c
> +++ b/tcp_splice.c
> @@ -142,20 +142,12 @@ static uint32_t tcp_splice_conn_epoll_events(uint16_t events, unsigned sidei)
> static int tcp_splice_epoll_ctl(struct tcp_splice_conn *conn)
> {
> uint32_t events[2];
> - int m;
> -
> - if (flow_in_epoll(&conn->f)) {
> - m = EPOLL_CTL_MOD;
> - } else {
> - flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
> - m = EPOLL_CTL_ADD;
> - }
>
> events[0] = tcp_splice_conn_epoll_events(conn->events, 0);
> events[1] = tcp_splice_conn_epoll_events(conn->events, 1);
>
> - if (flow_epoll_set(&conn->f, m, events[0], conn->s[0], 0) ||
> - flow_epoll_set(&conn->f, m, events[1], conn->s[1], 1)) {
> + if (flow_epoll_set(&conn->f, EPOLL_CTL_MOD, events[0], conn->s[0], 0) ||
> + flow_epoll_set(&conn->f, EPOLL_CTL_MOD, events[1], conn->s[1], 1)) {
> int ret = -errno;
> flow_perror(conn, "ERROR on epoll_ctl()");
> return ret;
> @@ -368,6 +360,14 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn)
>
> pif_sockaddr(c, &sa, tgtpif, &tgt->eaddr, tgt->eport);
>
> + flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
> + if (flow_epoll_set(&conn->f, EPOLL_CTL_ADD, 0, conn->s[0], 0) ||
> + flow_epoll_set(&conn->f, EPOLL_CTL_ADD, 0, conn->s[1], 1)) {
> + int ret = -errno;
> + flow_perror(conn, "Cannot register to epollfd");
> + return ret;
Do we need to worry about rollback here, if the first one succeeds,
but the second one fails?
> + }
> +
> conn_event(conn, SPLICE_CONNECT);
>
> if (connect(conn->s[1], &sa.sa, socklen_inany(&sa))) {
> --
> 2.52.0
>
--
David Gibson (he or they) | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you, not the other way
| around.
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH 1/3] tcp_splice: Register fds with epoll at flow creation
2026-01-19 4:45 ` David Gibson
@ 2026-01-19 8:10 ` Laurent Vivier
2026-01-19 8:36 ` David Gibson
0 siblings, 1 reply; 10+ messages in thread
From: Laurent Vivier @ 2026-01-19 8:10 UTC (permalink / raw)
To: David Gibson; +Cc: passt-dev
On 1/19/26 05:45, David Gibson wrote:
> On Fri, Jan 16, 2026 at 04:52:21PM +0100, Laurent Vivier wrote:
>> Register both splice connection sockets with epoll using empty events
>> (events=0) in tcp_splice_connect(), before initiating the connection.
>>
>> This allows tcp_splice_epoll_ctl() to always use EPOLL_CTL_MOD, removing
>> the need to check whether fds are already registered. As a result, the
>> conditional ADD/MOD logic is no longer needed, simplifying the function.
>>
>> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
>
> Nice! One query below.
>
>> ---
>> tcp_splice.c | 20 ++++++++++----------
>> 1 file changed, 10 insertions(+), 10 deletions(-)
>>
>> diff --git a/tcp_splice.c b/tcp_splice.c
>> index a7c04ca8652a..cb81e012ee4b 100644
>> --- a/tcp_splice.c
>> +++ b/tcp_splice.c
>> @@ -142,20 +142,12 @@ static uint32_t tcp_splice_conn_epoll_events(uint16_t events, unsigned sidei)
>> static int tcp_splice_epoll_ctl(struct tcp_splice_conn *conn)
>> {
>> uint32_t events[2];
>> - int m;
>> -
>> - if (flow_in_epoll(&conn->f)) {
>> - m = EPOLL_CTL_MOD;
>> - } else {
>> - flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
>> - m = EPOLL_CTL_ADD;
>> - }
>>
>> events[0] = tcp_splice_conn_epoll_events(conn->events, 0);
>> events[1] = tcp_splice_conn_epoll_events(conn->events, 1);
>>
>> - if (flow_epoll_set(&conn->f, m, events[0], conn->s[0], 0) ||
>> - flow_epoll_set(&conn->f, m, events[1], conn->s[1], 1)) {
>> + if (flow_epoll_set(&conn->f, EPOLL_CTL_MOD, events[0], conn->s[0], 0) ||
>> + flow_epoll_set(&conn->f, EPOLL_CTL_MOD, events[1], conn->s[1], 1)) {
>> int ret = -errno;
>> flow_perror(conn, "ERROR on epoll_ctl()");
>> return ret;
>> @@ -368,6 +360,14 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn)
>>
>> pif_sockaddr(c, &sa, tgtpif, &tgt->eaddr, tgt->eport);
>>
>> + flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
>> + if (flow_epoll_set(&conn->f, EPOLL_CTL_ADD, 0, conn->s[0], 0) ||
>> + flow_epoll_set(&conn->f, EPOLL_CTL_ADD, 0, conn->s[1], 1)) {
>> + int ret = -errno;
>> + flow_perror(conn, "Cannot register to epollfd");
>> + return ret;
>
> Do we need to worry about rollback here, if the first one succeeds,
> but the second one fails?
If we return an error here, tcp_splice_conn_from_sock() sets the CLOSING flag on the
connection and conn_flag() handles the closing flag by calling epoll_del() for both
sockets. So the cleanup path handles this case already.
None of the error cases in tcp_splice_connect() worries about rollback, so it's simpler to
do the same.
Thanks,
Laurent
>
>> + }
>> +
>> conn_event(conn, SPLICE_CONNECT);
>>
>> if (connect(conn->s[1], &sa.sa, socklen_inany(&sa))) {
>> --
>> 2.52.0
>>
>
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH 1/3] tcp_splice: Register fds with epoll at flow creation
2026-01-19 8:10 ` Laurent Vivier
@ 2026-01-19 8:36 ` David Gibson
0 siblings, 0 replies; 10+ messages in thread
From: David Gibson @ 2026-01-19 8:36 UTC (permalink / raw)
To: Laurent Vivier; +Cc: passt-dev
[-- Attachment #1: Type: text/plain, Size: 3157 bytes --]
On Mon, Jan 19, 2026 at 09:10:42AM +0100, Laurent Vivier wrote:
> On 1/19/26 05:45, David Gibson wrote:
> > On Fri, Jan 16, 2026 at 04:52:21PM +0100, Laurent Vivier wrote:
> > > Register both splice connection sockets with epoll using empty events
> > > (events=0) in tcp_splice_connect(), before initiating the connection.
> > >
> > > This allows tcp_splice_epoll_ctl() to always use EPOLL_CTL_MOD, removing
> > > the need to check whether fds are already registered. As a result, the
> > > conditional ADD/MOD logic is no longer needed, simplifying the function.
> > >
> > > Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> >
> > Nice! One query below.
> >
> > > ---
> > > tcp_splice.c | 20 ++++++++++----------
> > > 1 file changed, 10 insertions(+), 10 deletions(-)
> > >
> > > diff --git a/tcp_splice.c b/tcp_splice.c
> > > index a7c04ca8652a..cb81e012ee4b 100644
> > > --- a/tcp_splice.c
> > > +++ b/tcp_splice.c
> > > @@ -142,20 +142,12 @@ static uint32_t tcp_splice_conn_epoll_events(uint16_t events, unsigned sidei)
> > > static int tcp_splice_epoll_ctl(struct tcp_splice_conn *conn)
> > > {
> > > uint32_t events[2];
> > > - int m;
> > > -
> > > - if (flow_in_epoll(&conn->f)) {
> > > - m = EPOLL_CTL_MOD;
> > > - } else {
> > > - flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
> > > - m = EPOLL_CTL_ADD;
> > > - }
> > > events[0] = tcp_splice_conn_epoll_events(conn->events, 0);
> > > events[1] = tcp_splice_conn_epoll_events(conn->events, 1);
> > > - if (flow_epoll_set(&conn->f, m, events[0], conn->s[0], 0) ||
> > > - flow_epoll_set(&conn->f, m, events[1], conn->s[1], 1)) {
> > > + if (flow_epoll_set(&conn->f, EPOLL_CTL_MOD, events[0], conn->s[0], 0) ||
> > > + flow_epoll_set(&conn->f, EPOLL_CTL_MOD, events[1], conn->s[1], 1)) {
> > > int ret = -errno;
> > > flow_perror(conn, "ERROR on epoll_ctl()");
> > > return ret;
> > > @@ -368,6 +360,14 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn)
> > > pif_sockaddr(c, &sa, tgtpif, &tgt->eaddr, tgt->eport);
> > > + flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
> > > + if (flow_epoll_set(&conn->f, EPOLL_CTL_ADD, 0, conn->s[0], 0) ||
> > > + flow_epoll_set(&conn->f, EPOLL_CTL_ADD, 0, conn->s[1], 1)) {
> > > + int ret = -errno;
> > > + flow_perror(conn, "Cannot register to epollfd");
> > > + return ret;
> >
> > Do we need to worry about rollback here, if the first one succeeds,
> > but the second one fails?
>
> If we return an error here, tcp_splice_conn_from_sock() sets the CLOSING
> flag on the connection and conn_flag() handles the closing flag by calling
> epoll_del() for both sockets. So the cleanup path handles this case already.
>
> None of the error cases in tcp_splice_connect() worries about rollback, so
> it's simpler to do the same.
Ok, that makes sense. Maybe add that explanation to the commit
message?
--
David Gibson (he or they) | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you, not the other way
| around.
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH 2/3] tcp: Register fds with epoll at flow creation
2026-01-16 15:52 [PATCH 0/3] Register TCP flows with epoll at creation time Laurent Vivier
2026-01-16 15:52 ` [PATCH 1/3] tcp_splice: Register fds with epoll at flow creation Laurent Vivier
@ 2026-01-16 15:52 ` Laurent Vivier
2026-01-19 4:48 ` David Gibson
2026-01-16 15:52 ` [PATCH 3/3] flow: Remove EPOLLFD_ID_INVALID Laurent Vivier
2 siblings, 1 reply; 10+ messages in thread
From: Laurent Vivier @ 2026-01-16 15:52 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
Register connection sockets with epoll using empty events
(events=0) in tcp_conn_from_tap(), tcp_tap_conn_from_sock()
and tcp_flow_repair_socket().
This allows tcp_epoll_ctl() to always use EPOLL_CTL_MOD, removing
the need to check whether fds are already registered. As a result, the
conditional ADD/MOD logic is no longer needed, simplifying the function.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
flow.c | 1 +
tcp.c | 36 ++++++++++++++----------------------
2 files changed, 15 insertions(+), 22 deletions(-)
diff --git a/flow.c b/flow.c
index cefe6c8b5b24..532339ce7fe1 100644
--- a/flow.c
+++ b/flow.c
@@ -357,6 +357,7 @@ static void flow_set_state(struct flow_common *f, enum flow_state state)
*
* Return: true if flow is registered with epoll, false otherwise
*/
+/* cppcheck-suppress unusedFunction */
bool flow_in_epoll(const struct flow_common *f)
{
return f->epollid != EPOLLFD_ID_INVALID;
diff --git a/tcp.c b/tcp.c
index 1db861705ddb..d9bca041dea8 100644
--- a/tcp.c
+++ b/tcp.c
@@ -528,37 +528,22 @@ static uint32_t tcp_conn_epoll_events(uint8_t events, uint8_t conn_flags)
static int tcp_epoll_ctl(struct tcp_tap_conn *conn)
{
uint32_t events;
- int m;
if (conn->events == CLOSED) {
- if (flow_in_epoll(&conn->f)) {
- int epollfd = flow_epollfd(&conn->f);
+ int epollfd = flow_epollfd(&conn->f);
- epoll_del(epollfd, conn->sock);
- if (conn->timer != -1)
- epoll_del(epollfd, conn->timer);
- }
+ epoll_del(epollfd, conn->sock);
+ if (conn->timer != -1)
+ epoll_del(epollfd, conn->timer);
return 0;
}
events = tcp_conn_epoll_events(conn->events, conn->flags);
- if (flow_in_epoll(&conn->f)) {
- m = EPOLL_CTL_MOD;
- } else {
- flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
- m = EPOLL_CTL_ADD;
- }
-
- if (flow_epoll_set(&conn->f, m, events, conn->sock,
- !TAPSIDE(conn)) < 0) {
- int ret = -errno;
-
- if (m == EPOLL_CTL_ADD)
- flow_epollid_clear(&conn->f);
- return ret;
- }
+ if (flow_epoll_set(&conn->f, EPOLL_CTL_MOD, events, conn->sock,
+ !TAPSIDE(conn)) < 0)
+ return -errno;
return 0;
}
@@ -1710,6 +1695,8 @@ static void tcp_conn_from_tap(const struct ctx *c, sa_family_t af,
conn->sock = s;
conn->timer = -1;
conn->listening_sock = -1;
+ flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
+ flow_epoll_set(&conn->f, EPOLL_CTL_ADD, 0, s, !TAPSIDE(conn));
conn_event(c, conn, TAP_SYN_RCVD);
conn->wnd_to_tap = WINDOW_DEFAULT;
@@ -2433,6 +2420,8 @@ static void tcp_tap_conn_from_sock(const struct ctx *c, union flow *flow,
conn->sock = s;
conn->timer = -1;
conn->ws_to_tap = conn->ws_from_tap = 0;
+ flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
+ flow_epoll_set(&conn->f, EPOLL_CTL_ADD, 0, s, !TAPSIDE(conn));
conn_event(c, conn, SOCK_ACCEPTED);
hash = flow_hash_insert(c, TAP_SIDX(conn));
@@ -3825,6 +3814,9 @@ int tcp_flow_migrate_target(struct ctx *c, int fd)
return 0;
}
+ flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
+ flow_epoll_set(&conn->f, EPOLL_CTL_ADD, 0, conn->sock, !TAPSIDE(conn));
+
flow_hash_insert(c, TAP_SIDX(conn));
FLOW_ACTIVATE(conn);
--
2.52.0
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH 2/3] tcp: Register fds with epoll at flow creation
2026-01-16 15:52 ` [PATCH 2/3] tcp: " Laurent Vivier
@ 2026-01-19 4:48 ` David Gibson
2026-01-19 8:56 ` Laurent Vivier
0 siblings, 1 reply; 10+ messages in thread
From: David Gibson @ 2026-01-19 4:48 UTC (permalink / raw)
To: Laurent Vivier; +Cc: passt-dev
[-- Attachment #1: Type: text/plain, Size: 3957 bytes --]
On Fri, Jan 16, 2026 at 04:52:22PM +0100, Laurent Vivier wrote:
> Register connection sockets with epoll using empty events
> (events=0) in tcp_conn_from_tap(), tcp_tap_conn_from_sock()
> and tcp_flow_repair_socket().
>
> This allows tcp_epoll_ctl() to always use EPOLL_CTL_MOD, removing
> the need to check whether fds are already registered. As a result, the
> conditional ADD/MOD logic is no longer needed, simplifying the function.
>
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Couple of queries, but the concept looks good.
> ---
> flow.c | 1 +
> tcp.c | 36 ++++++++++++++----------------------
> 2 files changed, 15 insertions(+), 22 deletions(-)
>
> diff --git a/flow.c b/flow.c
> index cefe6c8b5b24..532339ce7fe1 100644
> --- a/flow.c
> +++ b/flow.c
> @@ -357,6 +357,7 @@ static void flow_set_state(struct flow_common *f, enum flow_state state)
> *
> * Return: true if flow is registered with epoll, false otherwise
> */
> +/* cppcheck-suppress unusedFunction */
> bool flow_in_epoll(const struct flow_common *f)
> {
> return f->epollid != EPOLLFD_ID_INVALID;
> diff --git a/tcp.c b/tcp.c
> index 1db861705ddb..d9bca041dea8 100644
> --- a/tcp.c
> +++ b/tcp.c
> @@ -528,37 +528,22 @@ static uint32_t tcp_conn_epoll_events(uint8_t events, uint8_t conn_flags)
> static int tcp_epoll_ctl(struct tcp_tap_conn *conn)
> {
> uint32_t events;
> - int m;
>
> if (conn->events == CLOSED) {
> - if (flow_in_epoll(&conn->f)) {
> - int epollfd = flow_epollfd(&conn->f);
> + int epollfd = flow_epollfd(&conn->f);
>
> - epoll_del(epollfd, conn->sock);
> - if (conn->timer != -1)
> - epoll_del(epollfd, conn->timer);
> - }
> + epoll_del(epollfd, conn->sock);
> + if (conn->timer != -1)
> + epoll_del(epollfd, conn->timer);
>
> return 0;
> }
>
> events = tcp_conn_epoll_events(conn->events, conn->flags);
>
> - if (flow_in_epoll(&conn->f)) {
> - m = EPOLL_CTL_MOD;
> - } else {
> - flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
> - m = EPOLL_CTL_ADD;
> - }
> -
> - if (flow_epoll_set(&conn->f, m, events, conn->sock,
> - !TAPSIDE(conn)) < 0) {
> - int ret = -errno;
> -
> - if (m == EPOLL_CTL_ADD)
> - flow_epollid_clear(&conn->f);
> - return ret;
> - }
> + if (flow_epoll_set(&conn->f, EPOLL_CTL_MOD, events, conn->sock,
> + !TAPSIDE(conn)) < 0)
> + return -errno;
>
> return 0;
> }
> @@ -1710,6 +1695,8 @@ static void tcp_conn_from_tap(const struct ctx *c, sa_family_t af,
> conn->sock = s;
> conn->timer = -1;
> conn->listening_sock = -1;
> + flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
> + flow_epoll_set(&conn->f, EPOLL_CTL_ADD, 0, s, !TAPSIDE(conn));
Do we need to handle errors here?
Because this is conn_from_tap(), we know that !TAPSIDE() will always
be TGTSIDE in this case.
> conn_event(c, conn, TAP_SYN_RCVD);
>
> conn->wnd_to_tap = WINDOW_DEFAULT;
> @@ -2433,6 +2420,8 @@ static void tcp_tap_conn_from_sock(const struct ctx *c, union flow *flow,
> conn->sock = s;
> conn->timer = -1;
> conn->ws_to_tap = conn->ws_from_tap = 0;
> + flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
> + flow_epoll_set(&conn->f, EPOLL_CTL_ADD, 0, s, !TAPSIDE(conn));
Same comments as above, except here we know it's INISIDE.
> conn_event(c, conn, SOCK_ACCEPTED);
>
> hash = flow_hash_insert(c, TAP_SIDX(conn));
> @@ -3825,6 +3814,9 @@ int tcp_flow_migrate_target(struct ctx *c, int fd)
> return 0;
> }
>
> + flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
> + flow_epoll_set(&conn->f, EPOLL_CTL_ADD, 0, conn->sock, !TAPSIDE(conn));
> +
> flow_hash_insert(c, TAP_SIDX(conn));
> FLOW_ACTIVATE(conn);
>
> --
> 2.52.0
>
--
David Gibson (he or they) | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you, not the other way
| around.
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH 2/3] tcp: Register fds with epoll at flow creation
2026-01-19 4:48 ` David Gibson
@ 2026-01-19 8:56 ` Laurent Vivier
0 siblings, 0 replies; 10+ messages in thread
From: Laurent Vivier @ 2026-01-19 8:56 UTC (permalink / raw)
To: David Gibson; +Cc: passt-dev
On 1/19/26 05:48, David Gibson wrote:
> On Fri, Jan 16, 2026 at 04:52:22PM +0100, Laurent Vivier wrote:
>> Register connection sockets with epoll using empty events
>> (events=0) in tcp_conn_from_tap(), tcp_tap_conn_from_sock()
>> and tcp_flow_repair_socket().
>>
>> This allows tcp_epoll_ctl() to always use EPOLL_CTL_MOD, removing
>> the need to check whether fds are already registered. As a result, the
>> conditional ADD/MOD logic is no longer needed, simplifying the function.
>>
>> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
>
> Couple of queries, but the concept looks good.
>
>> ---
>> flow.c | 1 +
>> tcp.c | 36 ++++++++++++++----------------------
>> 2 files changed, 15 insertions(+), 22 deletions(-)
>>
>> diff --git a/flow.c b/flow.c
>> index cefe6c8b5b24..532339ce7fe1 100644
>> --- a/flow.c
>> +++ b/flow.c
>> @@ -357,6 +357,7 @@ static void flow_set_state(struct flow_common *f, enum flow_state state)
>> *
>> * Return: true if flow is registered with epoll, false otherwise
>> */
>> +/* cppcheck-suppress unusedFunction */
>> bool flow_in_epoll(const struct flow_common *f)
>> {
>> return f->epollid != EPOLLFD_ID_INVALID;
>> diff --git a/tcp.c b/tcp.c
>> index 1db861705ddb..d9bca041dea8 100644
>> --- a/tcp.c
>> +++ b/tcp.c
>> @@ -528,37 +528,22 @@ static uint32_t tcp_conn_epoll_events(uint8_t events, uint8_t conn_flags)
>> static int tcp_epoll_ctl(struct tcp_tap_conn *conn)
>> {
>> uint32_t events;
>> - int m;
>>
>> if (conn->events == CLOSED) {
>> - if (flow_in_epoll(&conn->f)) {
>> - int epollfd = flow_epollfd(&conn->f);
>> + int epollfd = flow_epollfd(&conn->f);
>>
>> - epoll_del(epollfd, conn->sock);
>> - if (conn->timer != -1)
>> - epoll_del(epollfd, conn->timer);
>> - }
>> + epoll_del(epollfd, conn->sock);
>> + if (conn->timer != -1)
>> + epoll_del(epollfd, conn->timer);
>>
>> return 0;
>> }
>>
>> events = tcp_conn_epoll_events(conn->events, conn->flags);
>>
>> - if (flow_in_epoll(&conn->f)) {
>> - m = EPOLL_CTL_MOD;
>> - } else {
>> - flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
>> - m = EPOLL_CTL_ADD;
>> - }
>> -
>> - if (flow_epoll_set(&conn->f, m, events, conn->sock,
>> - !TAPSIDE(conn)) < 0) {
>> - int ret = -errno;
>> -
>> - if (m == EPOLL_CTL_ADD)
>> - flow_epollid_clear(&conn->f);
>> - return ret;
>> - }
>> + if (flow_epoll_set(&conn->f, EPOLL_CTL_MOD, events, conn->sock,
>> + !TAPSIDE(conn)) < 0)
>> + return -errno;
>>
>> return 0;
>> }
>> @@ -1710,6 +1695,8 @@ static void tcp_conn_from_tap(const struct ctx *c, sa_family_t af,
>> conn->sock = s;
>> conn->timer = -1;
>> conn->listening_sock = -1;
>> + flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
>> + flow_epoll_set(&conn->f, EPOLL_CTL_ADD, 0, s, !TAPSIDE(conn));
>
> Do we need to handle errors here?
It's a good question. If EPOLL_CTL_ADD fails, the subsequent EPOLL_CTL_MOD called from
tcp_epoll_ctl() will fail, so we can rely on that to handle the error... but in
conn_event(), tcp_epoll_ctl() error is not handler.
So I think we need at least to handle the error here.
>
> Because this is conn_from_tap(), we know that !TAPSIDE() will always
> be TGTSIDE in this case.
>
>> conn_event(c, conn, TAP_SYN_RCVD);
>>
>> conn->wnd_to_tap = WINDOW_DEFAULT;
>> @@ -2433,6 +2420,8 @@ static void tcp_tap_conn_from_sock(const struct ctx *c, union flow *flow,
>> conn->sock = s;
>> conn->timer = -1;
>> conn->ws_to_tap = conn->ws_from_tap = 0;
>> + flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
>> + flow_epoll_set(&conn->f, EPOLL_CTL_ADD, 0, s, !TAPSIDE(conn));
>
> Same comments as above, except here we know it's INISIDE.
Same answer.
>
>> conn_event(c, conn, SOCK_ACCEPTED);
>>
>> hash = flow_hash_insert(c, TAP_SIDX(conn));
>> @@ -3825,6 +3814,9 @@ int tcp_flow_migrate_target(struct ctx *c, int fd)
>> return 0;
>> }
>>
>> + flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
>> + flow_epoll_set(&conn->f, EPOLL_CTL_ADD, 0, conn->sock, !TAPSIDE(conn));
>> +
>> flow_hash_insert(c, TAP_SIDX(conn));
>> FLOW_ACTIVATE(conn);
>>
>> --
>> 2.52.0
>>
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH 3/3] flow: Remove EPOLLFD_ID_INVALID
2026-01-16 15:52 [PATCH 0/3] Register TCP flows with epoll at creation time Laurent Vivier
2026-01-16 15:52 ` [PATCH 1/3] tcp_splice: Register fds with epoll at flow creation Laurent Vivier
2026-01-16 15:52 ` [PATCH 2/3] tcp: " Laurent Vivier
@ 2026-01-16 15:52 ` Laurent Vivier
2026-01-19 4:51 ` David Gibson
2 siblings, 1 reply; 10+ messages in thread
From: Laurent Vivier @ 2026-01-16 15:52 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
As all flows are now registered with an epollid at creation, we no
longer need to test if a flow is in epoll. Remove all related code
including flow_in_epoll() and flow_epollid_clear().
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
flow.c | 35 ++---------------------------------
flow.h | 6 +-----
icmp.c | 1 -
tcp.c | 1 -
udp_flow.c | 1 -
5 files changed, 3 insertions(+), 41 deletions(-)
diff --git a/flow.c b/flow.c
index 532339ce7fe1..0c5503164714 100644
--- a/flow.c
+++ b/flow.c
@@ -127,7 +127,7 @@ static_assert(ARRAY_SIZE(flow_epoll) == FLOW_NUM_TYPES,
unsigned flow_first_free;
union flow flowtab[FLOW_MAX];
static const union flow *flow_new_entry; /* = NULL */
-static int epoll_id_to_fd[EPOLLFD_ID_MAX];
+static int epoll_id_to_fd[EPOLLFD_ID_SIZE];
/* Hash table to index it */
#define FLOW_HASH_LOAD 70 /* % */
@@ -351,18 +351,6 @@ static void flow_set_state(struct flow_common *f, enum flow_state state)
flow_log_details_(f, LOG_DEBUG, MAX(state, oldstate));
}
-/**
- * flow_in_epoll() - Check if flow is registered with an epoll instance
- * @f: Flow to check
- *
- * Return: true if flow is registered with epoll, false otherwise
- */
-/* cppcheck-suppress unusedFunction */
-bool flow_in_epoll(const struct flow_common *f)
-{
- return f->epollid != EPOLLFD_ID_INVALID;
-}
-
/**
* flow_epollfd() - Get the epoll file descriptor for a flow
* @f: Flow to query
@@ -371,13 +359,6 @@ bool flow_in_epoll(const struct flow_common *f)
*/
int flow_epollfd(const struct flow_common *f)
{
- if (f->epollid >= EPOLLFD_ID_MAX) {
- flow_log_(f, true, LOG_WARNING,
- "Invalid epollid %i for flow, assuming default",
- f->epollid);
- return epoll_id_to_fd[EPOLLFD_ID_DEFAULT];
- }
-
return epoll_id_to_fd[f->epollid];
}
@@ -388,20 +369,11 @@ int flow_epollfd(const struct flow_common *f)
*/
void flow_epollid_set(struct flow_common *f, int epollid)
{
- ASSERT(epollid < EPOLLFD_ID_MAX);
+ ASSERT(epollid < EPOLLFD_ID_SIZE);
f->epollid = epollid;
}
-/**
- * flow_epollid_clear() - Clear the flow epoll id
- * @f: Flow to update
- */
-void flow_epollid_clear(struct flow_common *f)
-{
- f->epollid = EPOLLFD_ID_INVALID;
-}
-
/**
* flow_epoll_set() - Add or modify epoll registration for a flow socket
* @f: Flow to register socket for
@@ -435,8 +407,6 @@ int flow_epoll_set(const struct flow_common *f, int command, uint32_t events,
*/
void flow_epollid_register(int epollid, int epollfd)
{
- ASSERT(epollid < EPOLLFD_ID_MAX);
-
epoll_id_to_fd[epollid] = epollfd;
}
@@ -643,7 +613,6 @@ union flow *flow_alloc(void)
flow_new_entry = flow;
memset(flow, 0, sizeof(*flow));
- flow_epollid_clear(&flow->f);
flow_set_state(&flow->f, FLOW_STATE_NEW);
return flow;
diff --git a/flow.h b/flow.h
index a74e13507957..d636358df422 100644
--- a/flow.h
+++ b/flow.h
@@ -178,7 +178,7 @@ int flowside_connect(const struct ctx *c, int s,
* @pif[]: Interface for each side of the flow
* @side[]: Information for each side of the flow
* @tap_omac: MAC address of remote endpoint as seen from the guest
- * @epollid: epollfd identifier, or EPOLLFD_ID_INVALID
+ * @epollid: epollfd identifier
*/
struct flow_common {
#ifdef __GNUC__
@@ -203,8 +203,6 @@ struct flow_common {
#define EPOLLFD_ID_DEFAULT 0
#define EPOLLFD_ID_SIZE (1 << EPOLLFD_ID_BITS)
-#define EPOLLFD_ID_MAX (EPOLLFD_ID_SIZE - 1)
-#define EPOLLFD_ID_INVALID EPOLLFD_ID_MAX
#define FLOW_INDEX_BITS 17 /* 128k - 1 */
#define FLOW_MAX MAX_FROM_BITS(FLOW_INDEX_BITS)
@@ -261,10 +259,8 @@ flow_sidx_t flow_lookup_sa(const struct ctx *c, uint8_t proto, uint8_t pif,
union flow;
void flow_init(void);
-bool flow_in_epoll(const struct flow_common *f);
int flow_epollfd(const struct flow_common *f);
void flow_epollid_set(struct flow_common *f, int epollid);
-void flow_epollid_clear(struct flow_common *f);
int flow_epoll_set(const struct flow_common *f, int command, uint32_t events,
int fd, unsigned int sidei);
void flow_epollid_register(int epollid, int epollfd);
diff --git a/icmp.c b/icmp.c
index eb7f11be5dad..9da5a787d181 100644
--- a/icmp.c
+++ b/icmp.c
@@ -213,7 +213,6 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
if (flow_epoll_set(&pingf->f, EPOLL_CTL_ADD, EPOLLIN, pingf->sock,
TGTSIDE) < 0) {
close(pingf->sock);
- flow_epollid_clear(&pingf->f);
goto cancel;
}
diff --git a/tcp.c b/tcp.c
index d9bca041dea8..a23e4b067b36 100644
--- a/tcp.c
+++ b/tcp.c
@@ -3749,7 +3749,6 @@ static int tcp_flow_repair_connect(const struct ctx *c,
return rc;
}
- flow_epollid_clear(&conn->f);
conn->timer = -1;
conn->listening_sock = -1;
diff --git a/udp_flow.c b/udp_flow.c
index 80b15433f0ac..d824e2c55b2c 100644
--- a/udp_flow.c
+++ b/udp_flow.c
@@ -86,7 +86,6 @@ static int udp_flow_sock(const struct ctx *c,
flow_epollid_set(&uflow->f, EPOLLFD_ID_DEFAULT);
if (flow_epoll_set(&uflow->f, EPOLL_CTL_ADD, EPOLLIN, s, sidei) < 0) {
rc = -errno;
- flow_epollid_clear(&uflow->f);
close(s);
return rc;
}
--
2.52.0
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH 3/3] flow: Remove EPOLLFD_ID_INVALID
2026-01-16 15:52 ` [PATCH 3/3] flow: Remove EPOLLFD_ID_INVALID Laurent Vivier
@ 2026-01-19 4:51 ` David Gibson
0 siblings, 0 replies; 10+ messages in thread
From: David Gibson @ 2026-01-19 4:51 UTC (permalink / raw)
To: Laurent Vivier; +Cc: passt-dev
[-- Attachment #1: Type: text/plain, Size: 5986 bytes --]
On Fri, Jan 16, 2026 at 04:52:23PM +0100, Laurent Vivier wrote:
> As all flows are now registered with an epollid at creation, we no
> longer need to test if a flow is in epoll. Remove all related code
> including flow_in_epoll() and flow_epollid_clear().
>
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
LGTM, except for one possible nit.
> ---
> flow.c | 35 ++---------------------------------
> flow.h | 6 +-----
> icmp.c | 1 -
> tcp.c | 1 -
> udp_flow.c | 1 -
> 5 files changed, 3 insertions(+), 41 deletions(-)
>
> diff --git a/flow.c b/flow.c
> index 532339ce7fe1..0c5503164714 100644
> --- a/flow.c
> +++ b/flow.c
> @@ -127,7 +127,7 @@ static_assert(ARRAY_SIZE(flow_epoll) == FLOW_NUM_TYPES,
> unsigned flow_first_free;
> union flow flowtab[FLOW_MAX];
> static const union flow *flow_new_entry; /* = NULL */
> -static int epoll_id_to_fd[EPOLLFD_ID_MAX];
> +static int epoll_id_to_fd[EPOLLFD_ID_SIZE];
>
> /* Hash table to index it */
> #define FLOW_HASH_LOAD 70 /* % */
> @@ -351,18 +351,6 @@ static void flow_set_state(struct flow_common *f, enum flow_state state)
> flow_log_details_(f, LOG_DEBUG, MAX(state, oldstate));
> }
>
> -/**
> - * flow_in_epoll() - Check if flow is registered with an epoll instance
> - * @f: Flow to check
> - *
> - * Return: true if flow is registered with epoll, false otherwise
> - */
> -/* cppcheck-suppress unusedFunction */
> -bool flow_in_epoll(const struct flow_common *f)
> -{
> - return f->epollid != EPOLLFD_ID_INVALID;
> -}
> -
> /**
> * flow_epollfd() - Get the epoll file descriptor for a flow
> * @f: Flow to query
> @@ -371,13 +359,6 @@ bool flow_in_epoll(const struct flow_common *f)
> */
> int flow_epollfd(const struct flow_common *f)
> {
> - if (f->epollid >= EPOLLFD_ID_MAX) {
> - flow_log_(f, true, LOG_WARNING,
> - "Invalid epollid %i for flow, assuming default",
> - f->epollid);
> - return epoll_id_to_fd[EPOLLFD_ID_DEFAULT];
> - }
> -
> return epoll_id_to_fd[f->epollid];
> }
>
> @@ -388,20 +369,11 @@ int flow_epollfd(const struct flow_common *f)
> */
> void flow_epollid_set(struct flow_common *f, int epollid)
> {
> - ASSERT(epollid < EPOLLFD_ID_MAX);
> + ASSERT(epollid < EPOLLFD_ID_SIZE);
>
> f->epollid = epollid;
> }
>
> -/**
> - * flow_epollid_clear() - Clear the flow epoll id
> - * @f: Flow to update
> - */
> -void flow_epollid_clear(struct flow_common *f)
> -{
> - f->epollid = EPOLLFD_ID_INVALID;
> -}
> -
> /**
> * flow_epoll_set() - Add or modify epoll registration for a flow socket
> * @f: Flow to register socket for
> @@ -435,8 +407,6 @@ int flow_epoll_set(const struct flow_common *f, int command, uint32_t events,
> */
> void flow_epollid_register(int epollid, int epollfd)
> {
> - ASSERT(epollid < EPOLLFD_ID_MAX);
> -
As with the other ASSERT()s this one could be updated to
EPOLLFD_ID_SIZE, rather than removed.
> epoll_id_to_fd[epollid] = epollfd;
> }
>
> @@ -643,7 +613,6 @@ union flow *flow_alloc(void)
>
> flow_new_entry = flow;
> memset(flow, 0, sizeof(*flow));
> - flow_epollid_clear(&flow->f);
> flow_set_state(&flow->f, FLOW_STATE_NEW);
>
> return flow;
> diff --git a/flow.h b/flow.h
> index a74e13507957..d636358df422 100644
> --- a/flow.h
> +++ b/flow.h
> @@ -178,7 +178,7 @@ int flowside_connect(const struct ctx *c, int s,
> * @pif[]: Interface for each side of the flow
> * @side[]: Information for each side of the flow
> * @tap_omac: MAC address of remote endpoint as seen from the guest
> - * @epollid: epollfd identifier, or EPOLLFD_ID_INVALID
> + * @epollid: epollfd identifier
> */
> struct flow_common {
> #ifdef __GNUC__
> @@ -203,8 +203,6 @@ struct flow_common {
>
> #define EPOLLFD_ID_DEFAULT 0
> #define EPOLLFD_ID_SIZE (1 << EPOLLFD_ID_BITS)
> -#define EPOLLFD_ID_MAX (EPOLLFD_ID_SIZE - 1)
> -#define EPOLLFD_ID_INVALID EPOLLFD_ID_MAX
>
> #define FLOW_INDEX_BITS 17 /* 128k - 1 */
> #define FLOW_MAX MAX_FROM_BITS(FLOW_INDEX_BITS)
> @@ -261,10 +259,8 @@ flow_sidx_t flow_lookup_sa(const struct ctx *c, uint8_t proto, uint8_t pif,
> union flow;
>
> void flow_init(void);
> -bool flow_in_epoll(const struct flow_common *f);
> int flow_epollfd(const struct flow_common *f);
> void flow_epollid_set(struct flow_common *f, int epollid);
> -void flow_epollid_clear(struct flow_common *f);
> int flow_epoll_set(const struct flow_common *f, int command, uint32_t events,
> int fd, unsigned int sidei);
> void flow_epollid_register(int epollid, int epollfd);
> diff --git a/icmp.c b/icmp.c
> index eb7f11be5dad..9da5a787d181 100644
> --- a/icmp.c
> +++ b/icmp.c
> @@ -213,7 +213,6 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
> if (flow_epoll_set(&pingf->f, EPOLL_CTL_ADD, EPOLLIN, pingf->sock,
> TGTSIDE) < 0) {
> close(pingf->sock);
> - flow_epollid_clear(&pingf->f);
> goto cancel;
> }
>
> diff --git a/tcp.c b/tcp.c
> index d9bca041dea8..a23e4b067b36 100644
> --- a/tcp.c
> +++ b/tcp.c
> @@ -3749,7 +3749,6 @@ static int tcp_flow_repair_connect(const struct ctx *c,
> return rc;
> }
>
> - flow_epollid_clear(&conn->f);
> conn->timer = -1;
> conn->listening_sock = -1;
>
> diff --git a/udp_flow.c b/udp_flow.c
> index 80b15433f0ac..d824e2c55b2c 100644
> --- a/udp_flow.c
> +++ b/udp_flow.c
> @@ -86,7 +86,6 @@ static int udp_flow_sock(const struct ctx *c,
> flow_epollid_set(&uflow->f, EPOLLFD_ID_DEFAULT);
> if (flow_epoll_set(&uflow->f, EPOLL_CTL_ADD, EPOLLIN, s, sidei) < 0) {
> rc = -errno;
> - flow_epollid_clear(&uflow->f);
> close(s);
> return rc;
> }
> --
> 2.52.0
>
--
David Gibson (he or they) | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you, not the other way
| around.
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 10+ messages in thread