On Fri, Dec 19, 2025 at 05:45:17PM +0100, Laurent Vivier wrote: > Rather than having each caller compute the appropriate epoll events and > pass them to flow_epoll_set(), move the event computation into > flow_epoll_set() itself. The function already switches on epoll_type to > determine other parameters, so computing events there is a natural fit. I'm not sure about this. Yes, the switch already exists. But, adding more protocol specific logic - and more access to the protocol specific flow entrie from flow_epoll_set() seems like a layering violation. I feel like it belongs better with the rest of the protocol specific logic. Maybe there's a sufficient advantage to putting it in flow_epollset() to outweigh that, but it's not obvious to me from this commit message. > Move tcp_conn_epoll_events() from tcp.c and tcp_splice_conn_epoll_events() > from tcp_splice.c into flow.c where they can be called internally. For > simpler cases (PING, UDP, TCP_TIMER), the events are constant values that > are now set directly in the switch cases. > > This centralizes all epoll event logic in one place, making it easier to > understand and maintain the relationship between flow types and their > epoll configurations. Yeah... I'm not convinced about this. > > Signed-off-by: Laurent Vivier > --- > flow.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++--- > flow.h | 2 +- > icmp.c | 2 +- > tcp.c | 43 ++------------------------ > tcp_splice.c | 35 ++------------------- > udp_flow.c | 2 +- > 6 files changed, 90 insertions(+), 80 deletions(-) > > diff --git a/flow.c b/flow.c > index a3f75015a078..6b9ccca6ef50 100644 > --- a/flow.c > +++ b/flow.c > @@ -391,21 +391,77 @@ void flow_epollid_clear(struct flow_common *f) > f->epollid = EPOLLFD_ID_INVALID; > } > > +/** > + * tcp_splice_conn_epoll_events() - epoll events masks for given state > + * @events: Connection event flags > + * @ev: Events to fill in, 0 is accepted socket, 1 is connecting socket > + */ > +static uint32_t tcp_splice_conn_epoll_events(uint16_t events, unsigned sidei) > +{ > + uint32_t e = 0; > + > + if (events & SPLICE_ESTABLISHED) { > + if (!(events & FIN_SENT(!sidei))) > + e = EPOLLIN | EPOLLRDHUP; > + } else if (sidei == 1 && events & SPLICE_CONNECT) { > + e = EPOLLOUT; > + } > + > + if (events & OUT_WAIT(sidei)) > + e |= EPOLLOUT; > + if (events & OUT_WAIT(!sidei)) > + e &= ~EPOLLIN; > + > + return e; > +} > + > +/** > + * tcp_conn_epoll_events() - epoll events mask for given connection state > + * @events: Current connection events > + * @conn_flags: Connection flags > + * > + * Return: epoll events mask corresponding to implied connection state > + */ > +static uint32_t tcp_conn_epoll_events(uint8_t events, uint8_t conn_flags) > +{ > + if (!events) > + return 0; > + > + if (events & ESTABLISHED) { > + if (events & TAP_FIN_SENT) > + return EPOLLET; > + > + if (conn_flags & STALLED) { > + if (conn_flags & ACK_FROM_TAP_BLOCKS) > + return EPOLLRDHUP | EPOLLET; > + > + return EPOLLIN | EPOLLRDHUP | EPOLLET; > + } > + > + return EPOLLIN | EPOLLRDHUP; > + } > + > + if (events == TAP_SYN_RCVD) > + return EPOLLOUT | EPOLLET | EPOLLRDHUP; > + > + return EPOLLET | EPOLLRDHUP; > +} > + > /** > * flow_epoll_set() - Add or modify epoll registration for a flow socket > * @type: epoll type > * @f: Flow to register socket for > - * @events: epoll events to watch for > * @fd: File descriptor to register > * @sidei: Side index of the flow > * > * Return: 0 on success, -1 on error (from epoll_ctl()) > */ > int flow_epoll_set(enum epoll_type type, const struct flow_common *f, > - uint32_t events, int fd, unsigned int sidei) > + int fd, unsigned int sidei) > { > struct epoll_event ev; > union epoll_ref ref; > + uint32_t events; > int epollfd; > int m; > > @@ -413,8 +469,24 @@ int flow_epoll_set(enum epoll_type type, const struct flow_common *f, > ref.type = type; > > switch (type) { > - case EPOLL_TYPE_TCP_SPLICE: > - case EPOLL_TYPE_TCP: > + case EPOLL_TYPE_TCP_SPLICE: { > + const struct tcp_splice_conn *conn = &((union flow *)f)->tcp_splice; > + > + ref.flowside = flow_sidx(f, sidei); > + if (flow_in_epoll(f)) { > + m = EPOLL_CTL_MOD; > + epollfd = flow_epollfd(f); > + } else { > + m = EPOLL_CTL_ADD; > + epollfd = epoll_id_to_fd[EPOLLFD_ID_DEFAULT]; > + } > + > + events = tcp_splice_conn_epoll_events(conn->events, sidei); > + break; > + } > + case EPOLL_TYPE_TCP: { > + const struct tcp_tap_conn *conn = &((union flow *)f)->tcp; > + > ref.flowside = flow_sidx(f, sidei); > if (flow_in_epoll(f)) { > m = EPOLL_CTL_MOD; > @@ -423,19 +495,24 @@ int flow_epoll_set(enum epoll_type type, const struct flow_common *f, > m = EPOLL_CTL_ADD; > epollfd = epoll_id_to_fd[EPOLLFD_ID_DEFAULT]; > } > + > + events = tcp_conn_epoll_events(conn->events, conn->flags); > break; > + } > case EPOLL_TYPE_TCP_TIMER: { > const struct tcp_tap_conn *conn = &((union flow *)f)->tcp; > > ref.flow = flow_idx(f); > m = conn->timer == -1 ? EPOLL_CTL_ADD : EPOLL_CTL_MOD; > epollfd = flow_epollfd(f); > + events = EPOLLIN | EPOLLET; > break; > } > case EPOLL_TYPE_PING: > ref.flowside = flow_sidx(f, sidei); > m = EPOLL_CTL_ADD; > epollfd = flow_epollfd(f); > + events = EPOLLIN; > break; > case EPOLL_TYPE_UDP: { > union { > @@ -446,6 +523,7 @@ int flow_epoll_set(enum epoll_type type, const struct flow_common *f, > ref.data = fref.data; > m = EPOLL_CTL_ADD; > epollfd = flow_epollfd(f); > + events = EPOLLIN; > break; > } > default: > diff --git a/flow.h b/flow.h > index 9740ede8963e..ed1e16139a4d 100644 > --- a/flow.h > +++ b/flow.h > @@ -266,7 +266,7 @@ int flow_epollfd(const struct flow_common *f); > void flow_epollid_set(struct flow_common *f, int epollid); > void flow_epollid_clear(struct flow_common *f); > int flow_epoll_set(enum epoll_type type, const struct flow_common *f, > - uint32_t events, int fd, unsigned int sidei); > + int fd, unsigned int sidei); > void flow_epollid_register(int epollid, int epollfd); > void flow_defer_handler(const struct ctx *c, const struct timespec *now); > int flow_migrate_source_early(struct ctx *c, const struct migrate_stage *stage, > diff --git a/icmp.c b/icmp.c > index f0b6fcff1776..5487a904117d 100644 > --- a/icmp.c > +++ b/icmp.c > @@ -210,7 +210,7 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c, > goto cancel; > > flow_epollid_set(&pingf->f, EPOLLFD_ID_DEFAULT); > - if (flow_epoll_set(EPOLL_TYPE_PING, &pingf->f, EPOLLIN, pingf->sock, > + if (flow_epoll_set(EPOLL_TYPE_PING, &pingf->f, pingf->sock, > TGTSIDE) < 0) { > close(pingf->sock); > goto cancel; > diff --git a/tcp.c b/tcp.c > index ec4f6d2ecc04..2e8a7d516273 100644 > --- a/tcp.c > +++ b/tcp.c > @@ -489,38 +489,6 @@ int tcp_set_peek_offset(const struct tcp_tap_conn *conn, int offset) > return 0; > } > > -/** > - * tcp_conn_epoll_events() - epoll events mask for given connection state > - * @events: Current connection events > - * @conn_flags: Connection flags > - * > - * Return: epoll events mask corresponding to implied connection state > - */ > -static uint32_t tcp_conn_epoll_events(uint8_t events, uint8_t conn_flags) > -{ > - if (!events) > - return 0; > - > - if (events & ESTABLISHED) { > - if (events & TAP_FIN_SENT) > - return EPOLLET; > - > - if (conn_flags & STALLED) { > - if (conn_flags & ACK_FROM_TAP_BLOCKS) > - return EPOLLRDHUP | EPOLLET; > - > - return EPOLLIN | EPOLLRDHUP | EPOLLET; > - } > - > - return EPOLLIN | EPOLLRDHUP; > - } > - > - if (events == TAP_SYN_RCVD) > - return EPOLLOUT | EPOLLET | EPOLLRDHUP; > - > - return EPOLLET | EPOLLRDHUP; > -} > - > /** > * tcp_epoll_ctl() - Add/modify/delete epoll state from connection events > * @c: Execution context > @@ -530,8 +498,6 @@ static uint32_t tcp_conn_epoll_events(uint8_t events, uint8_t conn_flags) > */ > static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn) > { > - uint32_t events; > - > if (conn->events == CLOSED) { > int epollfd = flow_in_epoll(&conn->f) ? flow_epollfd(&conn->f) : c->epollfd; > if (flow_in_epoll(&conn->f)) > @@ -541,9 +507,7 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn) > return 0; > } > > - events = tcp_conn_epoll_events(conn->events, conn->flags); > - > - if (flow_epoll_set(EPOLL_TYPE_TCP, &conn->f, events, conn->sock, > + if (flow_epoll_set(EPOLL_TYPE_TCP, &conn->f, conn->sock, > !TAPSIDE(conn))) > return -errno; > > @@ -551,7 +515,7 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn) > > if (conn->timer != -1) { > if (flow_epoll_set(EPOLL_TYPE_TCP_TIMER, &conn->f, > - EPOLLIN | EPOLLET, conn->timer, 0)) > + conn->timer, 0)) > return -errno; > } > > @@ -582,8 +546,7 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn) > return; > } > > - if (flow_epoll_set(EPOLL_TYPE_TCP_TIMER, &conn->f, > - EPOLLIN | EPOLLET, fd, 0)) { > + if (flow_epoll_set(EPOLL_TYPE_TCP_TIMER, &conn->f, fd, 0)) { > flow_dbg_perror(conn, "failed to add timer"); > close(fd); > return; > diff --git a/tcp_splice.c b/tcp_splice.c > index ccf627f4427d..1eeb678d534b 100644 > --- a/tcp_splice.c > +++ b/tcp_splice.c > @@ -109,30 +109,6 @@ static struct tcp_splice_conn *conn_at_sidx(flow_sidx_t sidx) > return &flow->tcp_splice; > } > > -/** > - * tcp_splice_conn_epoll_events() - epoll events masks for given state > - * @events: Connection event flags > - * @ev: Events to fill in, 0 is accepted socket, 1 is connecting socket > - */ > -static uint32_t tcp_splice_conn_epoll_events(uint16_t events, unsigned sidei) > -{ > - uint32_t e = 0; > - > - if (events & SPLICE_ESTABLISHED) { > - if (!(events & FIN_SENT(!sidei))) > - e = EPOLLIN | EPOLLRDHUP; > - } else if (sidei == 1 && events & SPLICE_CONNECT) { > - e = EPOLLOUT; > - } > - > - if (events & OUT_WAIT(sidei)) > - e |= EPOLLOUT; > - if (events & OUT_WAIT(!sidei)) > - e &= ~EPOLLIN; > - > - return e; > -} > - > /** > * tcp_splice_epoll_ctl() - Add/modify/delete epoll state from connection events > * @conn: Connection pointer > @@ -141,15 +117,8 @@ static uint32_t tcp_splice_conn_epoll_events(uint16_t events, unsigned sidei) > */ > static int tcp_splice_epoll_ctl(struct tcp_splice_conn *conn) > { > - uint32_t events[2]; > - > - events[0] = tcp_splice_conn_epoll_events(conn->events, 0); > - events[1] = tcp_splice_conn_epoll_events(conn->events, 1); > - > - if (flow_epoll_set(EPOLL_TYPE_TCP_SPLICE, &conn->f, events[0], > - conn->s[0], 0) || > - flow_epoll_set(EPOLL_TYPE_TCP_SPLICE, &conn->f, events[1], > - conn->s[1], 1)) { > + if (flow_epoll_set(EPOLL_TYPE_TCP_SPLICE, &conn->f, conn->s[0], 0) || > + flow_epoll_set(EPOLL_TYPE_TCP_SPLICE, &conn->f, conn->s[1], 1)) { > int ret = -errno; > flow_perror(conn, "ERROR on epoll_ctl()"); > return ret; > diff --git a/udp_flow.c b/udp_flow.c > index 07ff589670c1..afacfb356261 100644 > --- a/udp_flow.c > +++ b/udp_flow.c > @@ -84,7 +84,7 @@ static int udp_flow_sock(const struct ctx *c, > } > > flow_epollid_set(&uflow->f, EPOLLFD_ID_DEFAULT); > - rc = flow_epoll_set(EPOLL_TYPE_UDP, &uflow->f, EPOLLIN, s, sidei); > + rc = flow_epoll_set(EPOLL_TYPE_UDP, &uflow->f, s, sidei); > if (rc < 0) { > close(s); > return rc; > -- > 2.51.1 > -- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson