From: David Gibson <david@gibson.dropbear.id.au>
To: Laurent Vivier <lvivier@redhat.com>
Cc: passt-dev@passt.top
Subject: Re: [PATCH v3 3/6] util: Move epoll registration out of sock_l4_sa()
Date: Mon, 13 Oct 2025 15:47:23 +1100 [thread overview]
Message-ID: <aOyEW0uxHXBQooij@zatzit> (raw)
In-Reply-To: <20251009130409.3931795-4-lvivier@redhat.com>
[-- Attachment #1: Type: text/plain, Size: 9409 bytes --]
On Thu, Oct 09, 2025 at 03:04:06PM +0200, Laurent Vivier wrote:
> Move epoll_add() calls from sock_l4_sa() to the protocol-specific code
> (icmp.c, pif.c, udp_flow.c) to give callers more control over epoll
> registration. This allows sock_l4_sa() to focus solely on socket
> creation and binding, while epoll management happens at a higher level.
>
> Remove the data parameter from sock_l4_sa() and flowside_sock_l4() as
> it's no longer needed - callers now construct the full epoll_ref and
> register the socket themselves after creation.
>
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> ---
> flow.c | 10 ++++------
> flow.h | 2 +-
> icmp.c | 15 +++++++++++----
> pif.c | 32 +++++++++++++++++++++++++-------
> udp_flow.c | 16 ++++++++++++++--
> util.c | 10 +---------
> util.h | 2 +-
> 7 files changed, 57 insertions(+), 30 deletions(-)
>
> diff --git a/flow.c b/flow.c
> index feefda3ce74e..b14e9d8b63ff 100644
> --- a/flow.c
> +++ b/flow.c
> @@ -163,7 +163,6 @@ static void flowside_from_af(struct flowside *side, sa_family_t af,
> * @type: Socket epoll type
> * @sa: Socket address
> * @sl: Length of @sa
> - * @data: epoll reference data
> */
> struct flowside_sock_args {
> const struct ctx *c;
> @@ -173,7 +172,6 @@ struct flowside_sock_args {
> const struct sockaddr *sa;
> socklen_t sl;
> const char *path;
> - uint32_t data;
> };
>
> /** flowside_sock_splice() - Create and bind socket for PIF_SPLICE based on flowside
> @@ -188,7 +186,7 @@ static int flowside_sock_splice(void *arg)
> ns_enter(a->c);
>
> a->fd = sock_l4_sa(a->c, a->type, a->sa, a->sl, NULL,
> - a->sa->sa_family == AF_INET6, a->data);
> + a->sa->sa_family == AF_INET6);
> a->err = errno;
>
> return 0;
> @@ -205,7 +203,7 @@ static int flowside_sock_splice(void *arg)
> * (if specified).
> */
> int flowside_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif,
> - const struct flowside *tgt, uint32_t data)
> + const struct flowside *tgt)
> {
> const char *ifname = NULL;
> union sockaddr_inany sa;
> @@ -225,12 +223,12 @@ int flowside_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif,
> ifname = c->ip6.ifname_out;
>
> return sock_l4_sa(c, type, &sa, sl, ifname,
> - sa.sa_family == AF_INET6, data);
> + sa.sa_family == AF_INET6);
>
> case PIF_SPLICE: {
> struct flowside_sock_args args = {
> .c = c, .type = type,
> - .sa = &sa.sa, .sl = sl, .data = data,
> + .sa = &sa.sa, .sl = sl,
> };
> NS_CALL(flowside_sock_splice, &args);
> errno = args.err;
> diff --git a/flow.h b/flow.h
> index cac618ad0ca1..ef138b83add8 100644
> --- a/flow.h
> +++ b/flow.h
> @@ -167,7 +167,7 @@ static inline bool flowside_eq(const struct flowside *left,
> }
>
> int flowside_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif,
> - const struct flowside *tgt, uint32_t data);
> + const struct flowside *tgt);
> int flowside_connect(const struct ctx *c, int s,
> uint8_t pif, const struct flowside *tgt);
>
> diff --git a/icmp.c b/icmp.c
> index c26561da80bf..56dfac6c958e 100644
> --- a/icmp.c
> +++ b/icmp.c
> @@ -170,10 +170,10 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
> {
> uint8_t proto = af == AF_INET ? IPPROTO_ICMP : IPPROTO_ICMPV6;
> uint8_t flowtype = af == AF_INET ? FLOW_PING4 : FLOW_PING6;
> - union epoll_ref ref = { .type = EPOLL_TYPE_PING };
> union flow *flow = flow_alloc();
> struct icmp_ping_flow *pingf;
> const struct flowside *tgt;
> + union epoll_ref ref;
>
> if (!flow)
> return NULL;
> @@ -194,9 +194,7 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
>
> pingf->seq = -1;
>
> - ref.flowside = FLOW_SIDX(flow, TGTSIDE);
> - pingf->sock = flowside_sock_l4(c, EPOLL_TYPE_PING, PIF_HOST,
> - tgt, ref.data);
> + pingf->sock = flowside_sock_l4(c, EPOLL_TYPE_PING, PIF_HOST, tgt);
>
> if (pingf->sock < 0) {
> warn("Cannot open \"ping\" socket. You might need to:");
> @@ -208,6 +206,15 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
> if (pingf->sock > FD_REF_MAX)
> goto cancel;
>
> + ref.type = EPOLL_TYPE_PING;
> + ref.flowside = FLOW_SIDX(flow, TGTSIDE);
> + ref.fd = pingf->sock;
> +
> + if (epoll_add(c->epollfd, EPOLLIN, &ref) < 0) {
> + close(pingf->sock);
> + goto cancel;
> + }
> +
> flow_dbg(pingf, "new socket %i for echo ID %"PRIu16, pingf->sock, id);
>
> flow_hash_insert(c, FLOW_SIDX(pingf, INISIDE));
> diff --git a/pif.c b/pif.c
> index 592fafaab58a..17b41705183c 100644
> --- a/pif.c
> +++ b/pif.c
> @@ -5,6 +5,7 @@
> * Passt/pasta interface types and IDs
> */
>
> +#include <errno.h>
> #include <stdint.h>
> #include <assert.h>
> #include <netinet/in.h>
> @@ -14,7 +15,7 @@
> #include "siphash.h"
> #include "ip.h"
> #include "inany.h"
> -#include "passt.h"
> +#include "epoll_ctl.h"
>
> const char *pif_type_str[] = {
> [PIF_NONE] = "<none>",
> @@ -83,7 +84,9 @@ int pif_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif,
> .sa6.sin6_addr = in6addr_any,
> .sa6.sin6_port = htons(port),
> };
> + union epoll_ref ref;
> socklen_t sl;
> + int ret;
>
> ASSERT(pif_is_socket(pif));
>
> @@ -93,11 +96,26 @@ int pif_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif,
> ASSERT(addr && inany_is_loopback(addr));
> }
>
> - if (!addr)
> - return sock_l4_sa(c, type, &sa, sizeof(sa.sa6),
> - ifname, false, data);
> + if (!addr) {
> + ref.fd = sock_l4_sa(c, type, &sa, sizeof(sa.sa6),
> + ifname, false);
> + } else {
> + pif_sockaddr(c, &sa, &sl, pif, addr, port);
> + ref.fd = sock_l4_sa(c, type, &sa, sl,
> + ifname, sa.sa_family == AF_INET6);
> + }
> +
> + if (ref.fd < 0)
> + return ref.fd;
> +
> + ref.type = type;
> + ref.data = data;
> +
> + ret = epoll_add(c->epollfd, EPOLLIN, &ref);
> + if (ret < 0) {
> + close(ref.fd);
> + return ret;
> + }
>
> - pif_sockaddr(c, &sa, &sl, pif, addr, port);
> - return sock_l4_sa(c, type, &sa, sl,
> - ifname, sa.sa_family == AF_INET6, data);
> + return ref.fd;
> }
> diff --git a/udp_flow.c b/udp_flow.c
> index d9c75f1bb1d8..caaf3dababb1 100644
> --- a/udp_flow.c
> +++ b/udp_flow.c
> @@ -78,16 +78,28 @@ static int udp_flow_sock(const struct ctx *c,
> flow_sidx_t sidx;
> uint32_t data;
> } fref = { .sidx = FLOW_SIDX(uflow, sidei) };
> + union epoll_ref ref;
> + int rc;
> int s;
>
> - s = flowside_sock_l4(c, EPOLL_TYPE_UDP, pif, side, fref.data);
> + s = flowside_sock_l4(c, EPOLL_TYPE_UDP, pif, side);
> if (s < 0) {
> flow_dbg_perror(uflow, "Couldn't open flow specific socket");
> return s;
> }
>
> + ref.type = EPOLL_TYPE_UDP;
> + ref.data = fref.data;
> + ref.fd = s;
> +
> + rc = epoll_add(c->epollfd, EPOLLIN, &ref);
> + if (rc < 0) {
> + close(s);
> + return rc;
> + }
> +
> if (flowside_connect(c, s, pif, side) < 0) {
> - int rc = -errno;
> + rc = -errno;
>
> epoll_del(c->epollfd, s);
> close(s);
> diff --git a/util.c b/util.c
> index b2490123590a..707ab3388d13 100644
> --- a/util.c
> +++ b/util.c
> @@ -47,16 +47,14 @@
> * @sl: Length of @sa
> * @ifname: Interface for binding, NULL for any
> * @v6only: Set IPV6_V6ONLY socket option
> - * @data: epoll reference portion for protocol handlers
> *
> * Return: newly created socket, negative error code on failure
> */
> int sock_l4_sa(const struct ctx *c, enum epoll_type type,
> const void *sa, socklen_t sl,
> - const char *ifname, bool v6only, uint32_t data)
> + const char *ifname, bool v6only)
> {
> sa_family_t af = ((const struct sockaddr *)sa)->sa_family;
> - union epoll_ref ref = { .type = type, .data = data };
> bool freebind = false;
> int fd, y = 1, ret;
> uint8_t proto;
> @@ -99,8 +97,6 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type,
> return -EBADF;
> }
>
> - ref.fd = fd;
> -
> if (v6only)
> if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &y, sizeof(y)))
> debug("Failed to set IPV6_V6ONLY on socket %i", fd);
> @@ -171,10 +167,6 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type,
> return ret;
> }
>
> - ret = epoll_add(c->epollfd, EPOLLIN, &ref);
> - if (ret < 0)
> - return ret;
> -
> return fd;
> }
>
> diff --git a/util.h b/util.h
> index 8e4b4c5c6032..9108f8f19e14 100644
> --- a/util.h
> +++ b/util.h
> @@ -207,7 +207,7 @@ struct ctx;
>
> int sock_l4_sa(const struct ctx *c, enum epoll_type type,
> const void *sa, socklen_t sl,
> - const char *ifname, bool v6only, uint32_t data);
> + const char *ifname, bool v6only);
> int sock_unix(char *sock_path);
> void sock_probe_mem(struct ctx *c);
> long timespec_diff_ms(const struct timespec *a, const struct timespec *b);
> --
> 2.50.1
>
--
David Gibson (he or they) | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you, not the other way
| around.
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
next prev parent reply other threads:[~2025-10-13 4:47 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-09 13:04 [PATCH v3 0/6] Refactor epoll handling in preparation for multithreading Laurent Vivier
2025-10-09 13:04 ` [PATCH v3 1/6] util: Simplify epoll_del() interface to take epollfd directly Laurent Vivier
2025-10-16 21:31 ` Stefano Brivio
2025-10-09 13:04 ` [PATCH v3 2/6] epoll_ctl: Extract epoll operations Laurent Vivier
2025-10-13 4:28 ` David Gibson
2025-10-16 21:31 ` Stefano Brivio
2025-10-16 23:07 ` David Gibson
2025-10-09 13:04 ` [PATCH v3 3/6] util: Move epoll registration out of sock_l4_sa() Laurent Vivier
2025-10-13 4:47 ` David Gibson [this message]
2025-10-09 13:04 ` [PATCH v3 4/6] tcp, flow: Replace per-connection in_epoll flag with threadnb in flow_common Laurent Vivier
2025-10-15 3:54 ` David Gibson
2025-10-09 13:04 ` [PATCH v3 5/6] icmp: Use thread-based epoll management for ICMP flows Laurent Vivier
2025-10-09 13:04 ` [PATCH v3 6/6] udp: Use thread-based epoll management for UDP flows Laurent Vivier
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=aOyEW0uxHXBQooij@zatzit \
--to=david@gibson.dropbear.id.au \
--cc=lvivier@redhat.com \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).