On Tue, Oct 21, 2025 at 11:01:11PM +0200, Laurent Vivier wrote: > Centralize epoll_add() and epoll_del() helper functions into new > epoll_ctl.c/h files. > > This also moves the union epoll_ref definition from passt.h to > epoll_ctl.h where it's more logically placed. > > The new epoll_add() helper simplifies adding file descriptors to epoll > by taking an epoll_ref and events, handling error reporting > consistently across all call sites. > > Signed-off-by: Laurent Vivier Reviewed-by: David Gibson > --- > Makefile | 22 +++++++++++----------- > epoll_ctl.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ > epoll_ctl.h | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ > icmp.c | 4 +--- > passt.c | 2 +- > passt.h | 34 ---------------------------------- > pasta.c | 7 +++---- > repair.c | 18 +++++++----------- > tap.c | 13 ++++--------- > tcp.c | 2 +- > tcp_splice.c | 2 +- > udp.c | 2 +- > udp_flow.c | 1 + > util.c | 22 +++------------------- > util.h | 4 +++- > vhost_user.c | 8 ++------ > vu_common.c | 2 +- > 17 files changed, 136 insertions(+), 103 deletions(-) > create mode 100644 epoll_ctl.c > create mode 100644 epoll_ctl.h > > diff --git a/Makefile b/Makefile > index 3328f8324140..91e037b8fd3c 100644 > --- a/Makefile > +++ b/Makefile > @@ -37,23 +37,23 @@ FLAGS += -DPAGE_SIZE=$(shell getconf PAGE_SIZE) > FLAGS += -DVERSION=\"$(VERSION)\" > FLAGS += -DDUAL_STACK_SOCKETS=$(DUAL_STACK_SOCKETS) > > -PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c flow.c fwd.c \ > - icmp.c igmp.c inany.c iov.c ip.c isolation.c lineread.c log.c mld.c \ > - ndp.c netlink.c migrate.c packet.c passt.c pasta.c pcap.c pif.c \ > - repair.c tap.c tcp.c tcp_buf.c tcp_splice.c tcp_vu.c udp.c udp_flow.c \ > - udp_vu.c util.c vhost_user.c virtio.c vu_common.c > +PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c epoll_ctl.c \ > + flow.c fwd.c icmp.c igmp.c inany.c iov.c ip.c isolation.c lineread.c \ > + log.c mld.c ndp.c netlink.c migrate.c packet.c passt.c pasta.c pcap.c \ > + pif.c repair.c tap.c tcp.c tcp_buf.c tcp_splice.c tcp_vu.c udp.c \ > + udp_flow.c udp_vu.c util.c vhost_user.c virtio.c vu_common.c > QRAP_SRCS = qrap.c > PASST_REPAIR_SRCS = passt-repair.c > SRCS = $(PASST_SRCS) $(QRAP_SRCS) $(PASST_REPAIR_SRCS) > > MANPAGES = passt.1 pasta.1 qrap.1 passt-repair.1 > > -PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h fwd.h \ > - flow_table.h icmp.h icmp_flow.h inany.h iov.h ip.h isolation.h \ > - lineread.h log.h migrate.h ndp.h netlink.h packet.h passt.h pasta.h \ > - pcap.h pif.h repair.h siphash.h tap.h tcp.h tcp_buf.h tcp_conn.h \ > - tcp_internal.h tcp_splice.h tcp_vu.h udp.h udp_flow.h udp_internal.h \ > - udp_vu.h util.h vhost_user.h virtio.h vu_common.h > +PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h epoll_ctl.h \ > + flow.h fwd.h flow_table.h icmp.h icmp_flow.h inany.h iov.h ip.h \ > + isolation.h lineread.h log.h migrate.h ndp.h netlink.h packet.h \ > + passt.h pasta.h pcap.h pif.h repair.h siphash.h tap.h tcp.h tcp_buf.h \ > + tcp_conn.h tcp_internal.h tcp_splice.h tcp_vu.h udp.h udp_flow.h \ > + udp_internal.h udp_vu.h util.h vhost_user.h virtio.h vu_common.h > HEADERS = $(PASST_HEADERS) seccomp.h > > C := \#include \nint main(){int a=getrandom(0, 0, 0);} > diff --git a/epoll_ctl.c b/epoll_ctl.c > new file mode 100644 > index 000000000000..728a2afe1f6b > --- /dev/null > +++ b/epoll_ctl.c > @@ -0,0 +1,45 @@ > +// SPDX-License-Identifier: GPL-2.0-or-later > +/* epoll_ctl.c - epoll manipulation helpers > + * > + * Copyright Red Hat > + * Author: Laurent Vivier > + */ > + > +#include > + > +#include "epoll_ctl.h" > + > +/** > + * epoll_add() - Add a file descriptor to an epollfd > + * @epollfd: epoll file descriptor to add to > + * @events: epoll events > + * @ref: epoll reference for the file descriptor (includes fd and metadata) > + * > + * Return: 0 on success, negative errno on failure > + */ > +int epoll_add(int epollfd, uint32_t events, union epoll_ref ref) > +{ > + struct epoll_event ev; > + int ret; > + > + ev.events = events; > + ev.data.u64 = ref.u64; > + > + ret = epoll_ctl(epollfd, EPOLL_CTL_ADD, ref.fd, &ev); > + if (ret == -1) { > + ret = -errno; > + err("Failed to add fd to epoll: %s", strerror_(-ret)); > + } > + > + return ret; > +} > + > +/** > + * epoll_del() - Remove a file descriptor from an epollfd > + * @epollfd: epoll file descriptor to remove from > + * @fd: File descriptor to remove > + */ > +void epoll_del(int epollfd, int fd) > +{ > + epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL); > +} > diff --git a/epoll_ctl.h b/epoll_ctl.h > new file mode 100644 > index 000000000000..2d7e7123ae9d > --- /dev/null > +++ b/epoll_ctl.h > @@ -0,0 +1,51 @@ > +/* SPDX-License-Identifier: GPL-2.0-or-later > + * Copyright Red Hat > + * Author: Laurent Vivier > + */ > + > +#ifndef EPOLL_CTL_H > +#define EPOLL_CTL_H > + > +#include > + > +#include "util.h" > +#include "passt.h" > +#include "epoll_type.h" > +#include "flow.h" > +#include "tcp.h" > +#include "udp.h" > + > +/** > + * union epoll_ref - Breakdown of reference for epoll fd bookkeeping > + * @type: Type of fd (tells us what to do with events) > + * @fd: File descriptor number (implies < 2^24 total descriptors) > + * @flow: Index of the flow this fd is linked to > + * @tcp_listen: TCP-specific reference part for listening sockets > + * @udp: UDP-specific reference part > + * @data: Data handled by protocol handlers > + * @nsdir_fd: netns dirfd for fallback timer checking if namespace is gone > + * @queue: vhost-user queue index for this fd > + * @u64: Opaque reference for epoll_ctl() and epoll_wait() > + */ > +union epoll_ref { > + struct { > + enum epoll_type type:8; > + int32_t fd:FD_REF_BITS; > + union { > + uint32_t flow; > + flow_sidx_t flowside; > + union tcp_listen_epoll_ref tcp_listen; > + union udp_listen_epoll_ref udp; > + uint32_t data; > + int nsdir_fd; > + int queue; > + }; > + }; > + uint64_t u64; > +}; > +static_assert(sizeof(union epoll_ref) <= sizeof(union epoll_data), > + "epoll_ref must have same size as epoll_data"); > + > +int epoll_add(int epollfd, uint32_t events, union epoll_ref ref); > +void epoll_del(int epollfd, int fd); > +#endif /* EPOLL_CTL_H */ > diff --git a/icmp.c b/icmp.c > index bd3108a21675..c26561da80bf 100644 > --- a/icmp.c > +++ b/icmp.c > @@ -15,7 +15,6 @@ > #include > #include > #include > -#include > #include > #include > #include > @@ -23,10 +22,8 @@ > #include > #include > #include > -#include > #include > #include > -#include > #include > > #include > @@ -41,6 +38,7 @@ > #include "inany.h" > #include "icmp.h" > #include "flow_table.h" > +#include "epoll_ctl.h" > > #define ICMP_ECHO_TIMEOUT 60 /* s, timeout for ICMP socket activity */ > #define ICMP_NUM_IDS (1U << 16) > diff --git a/passt.c b/passt.c > index bdb7b6935f0c..af928111786b 100644 > --- a/passt.c > +++ b/passt.c > @@ -19,7 +19,6 @@ > * created in a separate network namespace). > */ > > -#include > #include > #include > #include > @@ -53,6 +52,7 @@ > #include "vu_common.h" > #include "migrate.h" > #include "repair.h" > +#include "epoll_ctl.h" > > #define NUM_EPOLL_EVENTS 8 > > diff --git a/passt.h b/passt.h > index 0075eb4b3b16..befe56bb167b 100644 > --- a/passt.h > +++ b/passt.h > @@ -35,40 +35,6 @@ union epoll_ref; > #define MAC_OUR_LAA \ > ((uint8_t [ETH_ALEN]){0x9a, 0x55, 0x9a, 0x55, 0x9a, 0x55}) > > -/** > - * union epoll_ref - Breakdown of reference for epoll fd bookkeeping > - * @type: Type of fd (tells us what to do with events) > - * @fd: File descriptor number (implies < 2^24 total descriptors) > - * @flow: Index of the flow this fd is linked to > - * @tcp_listen: TCP-specific reference part for listening sockets > - * @udp: UDP-specific reference part > - * @icmp: ICMP-specific reference part > - * @data: Data handled by protocol handlers > - * @nsdir_fd: netns dirfd for fallback timer checking if namespace is gone > - * @queue: vhost-user queue index for this fd > - * @u64: Opaque reference for epoll_ctl() and epoll_wait() > - */ > -union epoll_ref { > - struct { > - enum epoll_type type:8; > -#define FD_REF_BITS 24 > -#define FD_REF_MAX ((int)MAX_FROM_BITS(FD_REF_BITS)) > - int32_t fd:FD_REF_BITS; > - union { > - uint32_t flow; > - flow_sidx_t flowside; > - union tcp_listen_epoll_ref tcp_listen; > - union udp_listen_epoll_ref udp; > - uint32_t data; > - int nsdir_fd; > - int queue; > - }; > - }; > - uint64_t u64; > -}; > -static_assert(sizeof(union epoll_ref) <= sizeof(union epoll_data), > - "epoll_ref must have same size as epoll_data"); > - > /* Large enough for ~128 maximum size frames */ > #define PKT_BUF_BYTES (8UL << 20) > > diff --git a/pasta.c b/pasta.c > index 687406b6e736..e8636f45df2f 100644 > --- a/pasta.c > +++ b/pasta.c > @@ -27,7 +27,6 @@ > #include > #include > #include > -#include > #include > #include > #include > @@ -49,6 +48,7 @@ > #include "isolation.h" > #include "netlink.h" > #include "log.h" > +#include "epoll_ctl.h" > > #define HOSTNAME_PREFIX "pasta-" > > @@ -444,7 +444,6 @@ static int pasta_netns_quit_timer(void) > */ > void pasta_netns_quit_init(const struct ctx *c) > { > - struct epoll_event ev = { .events = EPOLLIN }; > int flags = O_NONBLOCK | O_CLOEXEC; > struct statfs s = { 0 }; > bool try_inotify = true; > @@ -487,8 +486,8 @@ void pasta_netns_quit_init(const struct ctx *c) > die("netns monitor file number %i too big, exiting", fd); > > ref.fd = fd; > - ev.data.u64 = ref.u64; > - epoll_ctl(c->epollfd, EPOLL_CTL_ADD, fd, &ev); > + > + epoll_add(c->epollfd, EPOLLIN, ref); > } > > /** > diff --git a/repair.c b/repair.c > index f6b1bf36479c..69c530773173 100644 > --- a/repair.c > +++ b/repair.c > @@ -22,6 +22,7 @@ > #include "inany.h" > #include "flow.h" > #include "flow_table.h" > +#include "epoll_ctl.h" > > #include "repair.h" > > @@ -47,7 +48,6 @@ static int repair_nfds; > void repair_sock_init(const struct ctx *c) > { > union epoll_ref ref = { .type = EPOLL_TYPE_REPAIR_LISTEN }; > - struct epoll_event ev = { 0 }; > > if (c->fd_repair_listen == -1) > return; > @@ -58,10 +58,8 @@ void repair_sock_init(const struct ctx *c) > } > > ref.fd = c->fd_repair_listen; > - ev.events = EPOLLIN | EPOLLHUP | EPOLLET; > - ev.data.u64 = ref.u64; > - if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_repair_listen, &ev)) > - err_perror("repair helper socket epoll_ctl(), won't migrate"); > + if (epoll_add(c->epollfd, EPOLLIN | EPOLLHUP | EPOLLET, ref)) > + err("repair helper socket epoll_ctl(), won't migrate"); > } > > /** > @@ -74,7 +72,6 @@ void repair_sock_init(const struct ctx *c) > int repair_listen_handler(struct ctx *c, uint32_t events) > { > union epoll_ref ref = { .type = EPOLL_TYPE_REPAIR }; > - struct epoll_event ev = { 0 }; > struct ucred ucred; > socklen_t len; > int rc; > @@ -112,11 +109,10 @@ int repair_listen_handler(struct ctx *c, uint32_t events) > info("Accepted TCP_REPAIR helper, PID %i", ucred.pid); > > ref.fd = c->fd_repair; > - ev.events = EPOLLHUP | EPOLLET; > - ev.data.u64 = ref.u64; > - if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_repair, &ev)) { > - rc = errno; > - debug_perror("epoll_ctl() on TCP_REPAIR helper socket"); > + > + rc = epoll_add(c->epollfd, EPOLLHUP | EPOLLET, ref); > + if (rc < 0) { > + debug("epoll_ctl() on TCP_REPAIR helper socket"); > close(c->fd_repair); > c->fd_repair = -1; > return rc; > diff --git a/tap.c b/tap.c > index 9812f120d426..314c2aebd39d 100644 > --- a/tap.c > +++ b/tap.c > @@ -26,7 +26,6 @@ > #include > #include > #include > -#include > #include > #include > #include > @@ -61,6 +60,7 @@ > #include "log.h" > #include "vhost_user.h" > #include "vu_common.h" > +#include "epoll_ctl.h" > > /* Maximum allowed frame lengths (including L2 header) */ > > @@ -1327,14 +1327,12 @@ static void tap_backend_show_hints(struct ctx *c) > static void tap_sock_unix_init(const struct ctx *c) > { > union epoll_ref ref = { .type = EPOLL_TYPE_TAP_LISTEN }; > - struct epoll_event ev = { 0 }; > > listen(c->fd_tap_listen, 0); > > ref.fd = c->fd_tap_listen; > - ev.events = EPOLLIN | EPOLLET; > - ev.data.u64 = ref.u64; > - epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap_listen, &ev); > + > + epoll_add(c->epollfd, EPOLLIN | EPOLLET, ref); > } > > /** > @@ -1343,7 +1341,6 @@ static void tap_sock_unix_init(const struct ctx *c) > */ > static void tap_start_connection(const struct ctx *c) > { > - struct epoll_event ev = { 0 }; > union epoll_ref ref = { 0 }; > > ref.fd = c->fd_tap; > @@ -1359,9 +1356,7 @@ static void tap_start_connection(const struct ctx *c) > break; > } > > - ev.events = EPOLLIN | EPOLLRDHUP; > - ev.data.u64 = ref.u64; > - epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev); > + epoll_add(c->epollfd, EPOLLIN | EPOLLRDHUP, ref); > > if (c->ifi4) > arp_send_init_req(c); > diff --git a/tcp.c b/tcp.c > index 745353f782f5..db9f17c0622f 100644 > --- a/tcp.c > +++ b/tcp.c > @@ -279,7 +279,6 @@ > #include > #include > #include > -#include > #include > #include > #include > @@ -309,6 +308,7 @@ > #include "tcp_internal.h" > #include "tcp_buf.h" > #include "tcp_vu.h" > +#include "epoll_ctl.h" > > /* > * The size of TCP header (including options) is given by doff (Data Offset) > diff --git a/tcp_splice.c b/tcp_splice.c > index 666ee62b738f..6f21184bdc55 100644 > --- a/tcp_splice.c > +++ b/tcp_splice.c > @@ -44,7 +44,6 @@ > #include > #include > #include > -#include > #include > #include > > @@ -56,6 +55,7 @@ > #include "siphash.h" > #include "inany.h" > #include "flow.h" > +#include "epoll_ctl.h" > > #include "flow_table.h" > > diff --git a/udp.c b/udp.c > index 86585b7e0942..3812d5c2336f 100644 > --- a/udp.c > +++ b/udp.c > @@ -94,7 +94,6 @@ > #include > #include > #include > -#include > #include > #include > #include > @@ -115,6 +114,7 @@ > #include "flow_table.h" > #include "udp_internal.h" > #include "udp_vu.h" > +#include "epoll_ctl.h" > > #define UDP_MAX_FRAMES 32 /* max # of frames to receive at once */ > > diff --git a/udp_flow.c b/udp_flow.c > index 84973f807167..d9c75f1bb1d8 100644 > --- a/udp_flow.c > +++ b/udp_flow.c > @@ -15,6 +15,7 @@ > #include "passt.h" > #include "flow_table.h" > #include "udp_internal.h" > +#include "epoll_ctl.h" > > #define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */ > > diff --git a/util.c b/util.c > index 1067486be414..e3f24f7b7e47 100644 > --- a/util.c > +++ b/util.c > @@ -18,7 +18,6 @@ > #include > #include > #include > -#include > #include > #include > #include > @@ -35,6 +34,7 @@ > #include "packet.h" > #include "log.h" > #include "pcap.h" > +#include "epoll_ctl.h" > #ifdef HAS_GETRANDOM > #include > #endif > @@ -58,7 +58,6 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type, > sa_family_t af = ((const struct sockaddr *)sa)->sa_family; > union epoll_ref ref = { .type = type, .data = data }; > bool freebind = false; > - struct epoll_event ev; > int fd, y = 1, ret; > uint8_t proto; > int socktype; > @@ -172,13 +171,9 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type, > return ret; > } > > - ev.events = EPOLLIN; > - ev.data.u64 = ref.u64; > - if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, fd, &ev) == -1) { > - ret = -errno; > - warn("L4 epoll_ctl: %s", strerror_(-ret)); > + ret = epoll_add(c->epollfd, EPOLLIN, ref); > + if (ret < 0) > return ret; > - } > > return fd; > } > @@ -994,17 +989,6 @@ void raw_random(void *buf, size_t buflen) > die("Unexpected EOF on random data source"); > } > > -/** > - * epoll_del() - Remove a file descriptor from our passt epoll > - * @epollfd: epoll file descriptor to remove from > - * @fd: File descriptor to remove > - */ > -void epoll_del(int epollfd, int fd) > -{ > - epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL); > - > -} > - > /** > * encode_domain_name() - Encode domain name according to RFC 1035, section 3.1 > * @buf: Buffer to fill in with encoded domain name > diff --git a/util.h b/util.h > index c61cbef357aa..8e4b4c5c6032 100644 > --- a/util.h > +++ b/util.h > @@ -193,6 +193,9 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags, > #define SNDBUF_BIG (4ULL * 1024 * 1024) > #define SNDBUF_SMALL (128ULL * 1024) > > +#define FD_REF_BITS 24 > +#define FD_REF_MAX ((int)MAX_FROM_BITS(FD_REF_BITS)) > + > #include > #include > #include > @@ -300,7 +303,6 @@ static inline bool mod_between(unsigned x, unsigned i, unsigned j, unsigned m) > #define FPRINTF(f, ...) (void)fprintf(f, __VA_ARGS__) > > void raw_random(void *buf, size_t buflen); > -void epoll_del(int epollfd, int fd); > > /* > * Starting from glibc 2.40.9000 and commit 25a5eb4010df ("string: strerror, > diff --git a/vhost_user.c b/vhost_user.c > index f8324c59cc6c..aa7c869d9e56 100644 > --- a/vhost_user.c > +++ b/vhost_user.c > @@ -32,8 +32,6 @@ > #include > #include > #include > -#include > -#include > #include > #include > #include > @@ -45,6 +43,7 @@ > #include "vhost_user.h" > #include "pcap.h" > #include "migrate.h" > +#include "epoll_ctl.h" > > /* vhost-user version we are compatible with */ > #define VHOST_USER_VERSION 1 > @@ -753,11 +752,8 @@ static void vu_set_watch(const struct vu_dev *vdev, int idx) > .fd = vdev->vq[idx].kick_fd, > .queue = idx > }; > - struct epoll_event ev = { 0 }; > > - ev.data.u64 = ref.u64; > - ev.events = EPOLLIN; > - epoll_ctl(vdev->context->epollfd, EPOLL_CTL_ADD, ref.fd, &ev); > + epoll_add(vdev->context->epollfd, EPOLLIN, ref); > } > > /** > diff --git a/vu_common.c b/vu_common.c > index b716070ea3c3..b13b7c308fd8 100644 > --- a/vu_common.c > +++ b/vu_common.c > @@ -6,7 +6,6 @@ > */ > > #include > -#include > #include > #include > #include > @@ -19,6 +18,7 @@ > #include "pcap.h" > #include "vu_common.h" > #include "migrate.h" > +#include "epoll_ctl.h" > > #define VU_MAX_TX_BUFFER_NB 2 > > -- > 2.51.0 > -- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson