On Fri, Dec 19, 2025 at 05:45:18PM +0100, Laurent Vivier wrote: > Currently, callers of flow_epoll_set() pass the file descriptor as an > explicit parameter. However, the function already has access to the flow > structure and knows the epoll type, so it can retrieve the appropriate fd > itself. > > Remove the fd parameter and have flow_epoll_set() extract the file > descriptor directly from the flow based on the type: > - EPOLL_TYPE_TCP_SPLICE: conn->s[sidei] > - EPOLL_TYPE_TCP: conn->sock > - EPOLL_TYPE_TCP_TIMER: conn->timer > - EPOLL_TYPE_PING: pingf->sock > - EPOLL_TYPE_UDP: uflow->s[sidei] As with the previous patch, this seems like a layering violation to me, and I'm not seeing a strong enough case for this change to outweight that. > For TCP timers, the previous logic determined ADD vs MOD by checking > whether conn->timer was -1. Since the timer fd must now be assigned to the > flow before calling flow_epoll_set(), this check would no longer work. > Instead, introduce FLOW_EPOLL_TIMER_ADD and FLOW_EPOLL_TIMER_MOD constants > and have callers specify the operation explicitly via the sidei parameter. Blech. > This requires callers to assign the socket or timer fd to the flow > structure before calling flow_epoll_set(), with appropriate cleanup > (resetting to -1) if the epoll registration fails. > > Signed-off-by: Laurent Vivier > --- > flow.c | 26 +++++++++++++++++++------- > flow.h | 4 +++- > icmp.c | 3 +-- > tcp.c | 12 ++++++------ > tcp_splice.c | 4 ++-- > udp_flow.c | 6 ++++-- > 6 files changed, 35 insertions(+), 20 deletions(-) > > diff --git a/flow.c b/flow.c > index 6b9ccca6ef50..7d6f9f2bf1a9 100644 > --- a/flow.c > +++ b/flow.c > @@ -451,13 +451,14 @@ static uint32_t tcp_conn_epoll_events(uint8_t events, uint8_t conn_flags) > * flow_epoll_set() - Add or modify epoll registration for a flow socket > * @type: epoll type > * @f: Flow to register socket for > - * @fd: File descriptor to register > - * @sidei: Side index of the flow > + * @sidei: Side index of the flow (for most types), or for EPOLL_TYPE_TCP_TIMER: > + * FLOW_EPOLL_TIMER_ADD to add the timer to epoll, > + * FLOW_EPOLL_TIMER_MOD to modify an existing registration > * > * Return: 0 on success, -1 on error (from epoll_ctl()) > */ > int flow_epoll_set(enum epoll_type type, const struct flow_common *f, > - int fd, unsigned int sidei) > + unsigned int sidei) > { > struct epoll_event ev; > union epoll_ref ref; > @@ -465,13 +466,13 @@ int flow_epoll_set(enum epoll_type type, const struct flow_common *f, > int epollfd; > int m; > > - ref.fd = fd; > ref.type = type; > > switch (type) { > case EPOLL_TYPE_TCP_SPLICE: { > const struct tcp_splice_conn *conn = &((union flow *)f)->tcp_splice; > > + ref.fd = conn->s[sidei]; > ref.flowside = flow_sidx(f, sidei); > if (flow_in_epoll(f)) { > m = EPOLL_CTL_MOD; > @@ -487,6 +488,7 @@ int flow_epoll_set(enum epoll_type type, const struct flow_common *f, > case EPOLL_TYPE_TCP: { > const struct tcp_tap_conn *conn = &((union flow *)f)->tcp; > > + ref.fd = conn->sock; > ref.flowside = flow_sidx(f, sidei); > if (flow_in_epoll(f)) { > m = EPOLL_CTL_MOD; > @@ -502,24 +504,34 @@ int flow_epoll_set(enum epoll_type type, const struct flow_common *f, > case EPOLL_TYPE_TCP_TIMER: { > const struct tcp_tap_conn *conn = &((union flow *)f)->tcp; > > + ref.fd = conn->timer; > ref.flow = flow_idx(f); > - m = conn->timer == -1 ? EPOLL_CTL_ADD : EPOLL_CTL_MOD; > + if (sidei == FLOW_EPOLL_TIMER_ADD) > + m = EPOLL_CTL_ADD; > + else > + m = EPOLL_CTL_MOD; > epollfd = flow_epollfd(f); > events = EPOLLIN | EPOLLET; > break; > } > - case EPOLL_TYPE_PING: > + case EPOLL_TYPE_PING: { > + const struct icmp_ping_flow *pingf = &((union flow *)f)->ping; > + > + ref.fd = pingf->sock; > ref.flowside = flow_sidx(f, sidei); > m = EPOLL_CTL_ADD; > epollfd = flow_epollfd(f); > events = EPOLLIN; > break; > + } > case EPOLL_TYPE_UDP: { > + const struct udp_flow *uflow = &((union flow *)f)->udp; > union { > flow_sidx_t sidx; > uint32_t data; > } fref = { .sidx = flow_sidx(f, sidei) }; > > + ref.fd = uflow->s[sidei]; > ref.data = fref.data; > m = EPOLL_CTL_ADD; > epollfd = flow_epollfd(f); > @@ -533,7 +545,7 @@ int flow_epoll_set(enum epoll_type type, const struct flow_common *f, > ev.events = events; > ev.data.u64 = ref.u64; > > - return epoll_ctl(epollfd, m, fd, &ev); > + return epoll_ctl(epollfd, m, ref.fd, &ev); > } > > /** > diff --git a/flow.h b/flow.h > index ed1e16139a4d..3a8224dba3b0 100644 > --- a/flow.h > +++ b/flow.h > @@ -266,7 +266,9 @@ int flow_epollfd(const struct flow_common *f); > void flow_epollid_set(struct flow_common *f, int epollid); > void flow_epollid_clear(struct flow_common *f); > int flow_epoll_set(enum epoll_type type, const struct flow_common *f, > - int fd, unsigned int sidei); > + unsigned int sidei); > +#define FLOW_EPOLL_TIMER_MOD 0 > +#define FLOW_EPOLL_TIMER_ADD 1 > void flow_epollid_register(int epollid, int epollfd); > void flow_defer_handler(const struct ctx *c, const struct timespec *now); > int flow_migrate_source_early(struct ctx *c, const struct migrate_stage *stage, > diff --git a/icmp.c b/icmp.c > index 5487a904117d..2f2fb56ed726 100644 > --- a/icmp.c > +++ b/icmp.c > @@ -210,8 +210,7 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c, > goto cancel; > > flow_epollid_set(&pingf->f, EPOLLFD_ID_DEFAULT); > - if (flow_epoll_set(EPOLL_TYPE_PING, &pingf->f, pingf->sock, > - TGTSIDE) < 0) { > + if (flow_epoll_set(EPOLL_TYPE_PING, &pingf->f, TGTSIDE) < 0) { > close(pingf->sock); > goto cancel; > } > diff --git a/tcp.c b/tcp.c > index 2e8a7d516273..730da457081c 100644 > --- a/tcp.c > +++ b/tcp.c > @@ -507,15 +507,14 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn) > return 0; > } > > - if (flow_epoll_set(EPOLL_TYPE_TCP, &conn->f, conn->sock, > - !TAPSIDE(conn))) > + if (flow_epoll_set(EPOLL_TYPE_TCP, &conn->f, !TAPSIDE(conn))) > return -errno; > > flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT); > > if (conn->timer != -1) { > if (flow_epoll_set(EPOLL_TYPE_TCP_TIMER, &conn->f, > - conn->timer, 0)) > + FLOW_EPOLL_TIMER_MOD)) > return -errno; > } > > @@ -546,13 +545,14 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn) > return; > } > > - if (flow_epoll_set(EPOLL_TYPE_TCP_TIMER, &conn->f, fd, 0)) { > + conn->timer = fd; > + if (flow_epoll_set(EPOLL_TYPE_TCP_TIMER, &conn->f, > + FLOW_EPOLL_TIMER_ADD)) { > flow_dbg_perror(conn, "failed to add timer"); > close(fd); > + conn->timer = -1; > return; > } > - > - conn->timer = fd; > } > > if (conn->flags & ACK_TO_TAP_DUE) { > diff --git a/tcp_splice.c b/tcp_splice.c > index 1eeb678d534b..964a559782a3 100644 > --- a/tcp_splice.c > +++ b/tcp_splice.c > @@ -117,8 +117,8 @@ static struct tcp_splice_conn *conn_at_sidx(flow_sidx_t sidx) > */ > static int tcp_splice_epoll_ctl(struct tcp_splice_conn *conn) > { > - if (flow_epoll_set(EPOLL_TYPE_TCP_SPLICE, &conn->f, conn->s[0], 0) || > - flow_epoll_set(EPOLL_TYPE_TCP_SPLICE, &conn->f, conn->s[1], 1)) { > + if (flow_epoll_set(EPOLL_TYPE_TCP_SPLICE, &conn->f, 0) || > + flow_epoll_set(EPOLL_TYPE_TCP_SPLICE, &conn->f, 1)) { > int ret = -errno; > flow_perror(conn, "ERROR on epoll_ctl()"); > return ret; > diff --git a/udp_flow.c b/udp_flow.c > index afacfb356261..9a67ad701c34 100644 > --- a/udp_flow.c > +++ b/udp_flow.c > @@ -83,10 +83,12 @@ static int udp_flow_sock(const struct ctx *c, > return s; > } > > + uflow->s[sidei] = s; > flow_epollid_set(&uflow->f, EPOLLFD_ID_DEFAULT); > - rc = flow_epoll_set(EPOLL_TYPE_UDP, &uflow->f, s, sidei); > + rc = flow_epoll_set(EPOLL_TYPE_UDP, &uflow->f, sidei); > if (rc < 0) { > close(s); > + uflow->s[sidei] = -1; > return rc; > } > > @@ -95,11 +97,11 @@ static int udp_flow_sock(const struct ctx *c, > > epoll_del(flow_epollfd(&uflow->f), s); > close(s); > + uflow->s[sidei] = -1; > > flow_dbg_perror(uflow, "Couldn't connect flow socket"); > return rc; > } > - uflow->s[sidei] = s; > > /* It's possible, if unlikely, that we could receive some packets in > * between the bind() and connect() which may or may not be for this > -- > 2.51.1 > -- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson