public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: Laurent Vivier <lvivier@redhat.com>
Cc: passt-dev@passt.top
Subject: Re: [PATCH v3 3/6] util: Move epoll registration out of sock_l4_sa()
Date: Mon, 13 Oct 2025 15:47:23 +1100	[thread overview]
Message-ID: <aOyEW0uxHXBQooij@zatzit> (raw)
In-Reply-To: <20251009130409.3931795-4-lvivier@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 9409 bytes --]

On Thu, Oct 09, 2025 at 03:04:06PM +0200, Laurent Vivier wrote:
> Move epoll_add() calls from sock_l4_sa() to the protocol-specific code
> (icmp.c, pif.c, udp_flow.c) to give callers more control over epoll
> registration. This allows sock_l4_sa() to focus solely on socket
> creation and binding, while epoll management happens at a higher level.
> 
> Remove the data parameter from sock_l4_sa() and flowside_sock_l4() as
> it's no longer needed - callers now construct the full epoll_ref and
> register the socket themselves after creation.
> 
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
>  flow.c     | 10 ++++------
>  flow.h     |  2 +-
>  icmp.c     | 15 +++++++++++----
>  pif.c      | 32 +++++++++++++++++++++++++-------
>  udp_flow.c | 16 ++++++++++++++--
>  util.c     | 10 +---------
>  util.h     |  2 +-
>  7 files changed, 57 insertions(+), 30 deletions(-)
> 
> diff --git a/flow.c b/flow.c
> index feefda3ce74e..b14e9d8b63ff 100644
> --- a/flow.c
> +++ b/flow.c
> @@ -163,7 +163,6 @@ static void flowside_from_af(struct flowside *side, sa_family_t af,
>   * @type:	Socket epoll type
>   * @sa:		Socket address
>   * @sl:		Length of @sa
> - * @data:	epoll reference data
>   */
>  struct flowside_sock_args {
>  	const struct ctx *c;
> @@ -173,7 +172,6 @@ struct flowside_sock_args {
>  	const struct sockaddr *sa;
>  	socklen_t sl;
>  	const char *path;
> -	uint32_t data;
>  };
>  
>  /** flowside_sock_splice() - Create and bind socket for PIF_SPLICE based on flowside
> @@ -188,7 +186,7 @@ static int flowside_sock_splice(void *arg)
>  	ns_enter(a->c);
>  
>  	a->fd = sock_l4_sa(a->c, a->type, a->sa, a->sl, NULL,
> -	                   a->sa->sa_family == AF_INET6, a->data);
> +	                   a->sa->sa_family == AF_INET6);
>  	a->err = errno;
>  
>  	return 0;
> @@ -205,7 +203,7 @@ static int flowside_sock_splice(void *arg)
>   *         (if specified).
>   */
>  int flowside_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif,
> -		     const struct flowside *tgt, uint32_t data)
> +		     const struct flowside *tgt)
>  {
>  	const char *ifname = NULL;
>  	union sockaddr_inany sa;
> @@ -225,12 +223,12 @@ int flowside_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif,
>  			ifname = c->ip6.ifname_out;
>  
>  		return sock_l4_sa(c, type, &sa, sl, ifname,
> -				  sa.sa_family == AF_INET6, data);
> +				  sa.sa_family == AF_INET6);
>  
>  	case PIF_SPLICE: {
>  		struct flowside_sock_args args = {
>  			.c = c, .type = type,
> -			.sa = &sa.sa, .sl = sl, .data = data,
> +			.sa = &sa.sa, .sl = sl,
>  		};
>  		NS_CALL(flowside_sock_splice, &args);
>  		errno = args.err;
> diff --git a/flow.h b/flow.h
> index cac618ad0ca1..ef138b83add8 100644
> --- a/flow.h
> +++ b/flow.h
> @@ -167,7 +167,7 @@ static inline bool flowside_eq(const struct flowside *left,
>  }
>  
>  int flowside_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif,
> -		     const struct flowside *tgt, uint32_t data);
> +		     const struct flowside *tgt);
>  int flowside_connect(const struct ctx *c, int s,
>  		     uint8_t pif, const struct flowside *tgt);
>  
> diff --git a/icmp.c b/icmp.c
> index c26561da80bf..56dfac6c958e 100644
> --- a/icmp.c
> +++ b/icmp.c
> @@ -170,10 +170,10 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
>  {
>  	uint8_t proto = af == AF_INET ? IPPROTO_ICMP : IPPROTO_ICMPV6;
>  	uint8_t flowtype = af == AF_INET ? FLOW_PING4 : FLOW_PING6;
> -	union epoll_ref ref = { .type = EPOLL_TYPE_PING };
>  	union flow *flow = flow_alloc();
>  	struct icmp_ping_flow *pingf;
>  	const struct flowside *tgt;
> +	union epoll_ref ref;
>  
>  	if (!flow)
>  		return NULL;
> @@ -194,9 +194,7 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
>  
>  	pingf->seq = -1;
>  
> -	ref.flowside = FLOW_SIDX(flow, TGTSIDE);
> -	pingf->sock = flowside_sock_l4(c, EPOLL_TYPE_PING, PIF_HOST,
> -				       tgt, ref.data);
> +	pingf->sock = flowside_sock_l4(c, EPOLL_TYPE_PING, PIF_HOST, tgt);
>  
>  	if (pingf->sock < 0) {
>  		warn("Cannot open \"ping\" socket. You might need to:");
> @@ -208,6 +206,15 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
>  	if (pingf->sock > FD_REF_MAX)
>  		goto cancel;
>  
> +	ref.type = EPOLL_TYPE_PING;
> +	ref.flowside = FLOW_SIDX(flow, TGTSIDE);
> +	ref.fd = pingf->sock;
> +
> +	if (epoll_add(c->epollfd, EPOLLIN, &ref) < 0) {
> +		close(pingf->sock);
> +		goto cancel;
> +	}
> +
>  	flow_dbg(pingf, "new socket %i for echo ID %"PRIu16, pingf->sock, id);
>  
>  	flow_hash_insert(c, FLOW_SIDX(pingf, INISIDE));
> diff --git a/pif.c b/pif.c
> index 592fafaab58a..17b41705183c 100644
> --- a/pif.c
> +++ b/pif.c
> @@ -5,6 +5,7 @@
>   * Passt/pasta interface types and IDs
>   */
>  
> +#include <errno.h>
>  #include <stdint.h>
>  #include <assert.h>
>  #include <netinet/in.h>
> @@ -14,7 +15,7 @@
>  #include "siphash.h"
>  #include "ip.h"
>  #include "inany.h"
> -#include "passt.h"
> +#include "epoll_ctl.h"
>  
>  const char *pif_type_str[] = {
>  	[PIF_NONE]		= "<none>",
> @@ -83,7 +84,9 @@ int pif_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif,
>  		.sa6.sin6_addr = in6addr_any,
>  		.sa6.sin6_port = htons(port),
>  	};
> +	union epoll_ref ref;
>  	socklen_t sl;
> +	int ret;
>  
>  	ASSERT(pif_is_socket(pif));
>  
> @@ -93,11 +96,26 @@ int pif_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif,
>  		ASSERT(addr && inany_is_loopback(addr));
>  	}
>  
> -	if (!addr)
> -		return sock_l4_sa(c, type, &sa, sizeof(sa.sa6),
> -				  ifname, false, data);
> +	if (!addr) {
> +		ref.fd = sock_l4_sa(c, type, &sa, sizeof(sa.sa6),
> +				    ifname, false);
> +	} else {
> +		pif_sockaddr(c, &sa, &sl, pif, addr, port);
> +		ref.fd = sock_l4_sa(c, type, &sa, sl,
> +				    ifname, sa.sa_family == AF_INET6);
> +	}
> +
> +	if (ref.fd < 0)
> +		return ref.fd;
> +
> +	ref.type = type;
> +	ref.data = data;
> +
> +	ret = epoll_add(c->epollfd, EPOLLIN, &ref);
> +	if (ret < 0) {
> +		close(ref.fd);
> +		return ret;
> +	}
>  
> -	pif_sockaddr(c, &sa, &sl, pif, addr, port);
> -	return sock_l4_sa(c, type, &sa, sl,
> -			  ifname, sa.sa_family == AF_INET6, data);
> +	return ref.fd;
>  }
> diff --git a/udp_flow.c b/udp_flow.c
> index d9c75f1bb1d8..caaf3dababb1 100644
> --- a/udp_flow.c
> +++ b/udp_flow.c
> @@ -78,16 +78,28 @@ static int udp_flow_sock(const struct ctx *c,
>  		flow_sidx_t sidx;
>  		uint32_t data;
>  	} fref = { .sidx = FLOW_SIDX(uflow, sidei) };
> +	union epoll_ref ref;
> +	int rc;
>  	int s;
>  
> -	s = flowside_sock_l4(c, EPOLL_TYPE_UDP, pif, side, fref.data);
> +	s = flowside_sock_l4(c, EPOLL_TYPE_UDP, pif, side);
>  	if (s < 0) {
>  		flow_dbg_perror(uflow, "Couldn't open flow specific socket");
>  		return s;
>  	}
>  
> +	ref.type = EPOLL_TYPE_UDP;
> +	ref.data = fref.data;
> +	ref.fd = s;
> +
> +	rc = epoll_add(c->epollfd, EPOLLIN, &ref);
> +	if (rc < 0) {
> +		close(s);
> +		return rc;
> +	}
> +
>  	if (flowside_connect(c, s, pif, side) < 0) {
> -		int rc = -errno;
> +		rc = -errno;
>  
>  		epoll_del(c->epollfd, s);
>  		close(s);
> diff --git a/util.c b/util.c
> index b2490123590a..707ab3388d13 100644
> --- a/util.c
> +++ b/util.c
> @@ -47,16 +47,14 @@
>   * @sl:		Length of @sa
>   * @ifname:	Interface for binding, NULL for any
>   * @v6only:	Set IPV6_V6ONLY socket option
> - * @data:	epoll reference portion for protocol handlers
>   *
>   * Return: newly created socket, negative error code on failure
>   */
>  int sock_l4_sa(const struct ctx *c, enum epoll_type type,
>  	       const void *sa, socklen_t sl,
> -	       const char *ifname, bool v6only, uint32_t data)
> +	       const char *ifname, bool v6only)
>  {
>  	sa_family_t af = ((const struct sockaddr *)sa)->sa_family;
> -	union epoll_ref ref = { .type = type, .data = data };
>  	bool freebind = false;
>  	int fd, y = 1, ret;
>  	uint8_t proto;
> @@ -99,8 +97,6 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type,
>  		return -EBADF;
>  	}
>  
> -	ref.fd = fd;
> -
>  	if (v6only)
>  		if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &y, sizeof(y)))
>  			debug("Failed to set IPV6_V6ONLY on socket %i", fd);
> @@ -171,10 +167,6 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type,
>  		return ret;
>  	}
>  
> -	ret = epoll_add(c->epollfd, EPOLLIN, &ref);
> -	if (ret < 0)
> -		return ret;
> -
>  	return fd;
>  }
>  
> diff --git a/util.h b/util.h
> index 8e4b4c5c6032..9108f8f19e14 100644
> --- a/util.h
> +++ b/util.h
> @@ -207,7 +207,7 @@ struct ctx;
>  
>  int sock_l4_sa(const struct ctx *c, enum epoll_type type,
>  	       const void *sa, socklen_t sl,
> -	       const char *ifname, bool v6only, uint32_t data);
> +	       const char *ifname, bool v6only);
>  int sock_unix(char *sock_path);
>  void sock_probe_mem(struct ctx *c);
>  long timespec_diff_ms(const struct timespec *a, const struct timespec *b);
> -- 
> 2.50.1
> 

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

  reply	other threads:[~2025-10-13  4:47 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-09 13:04 [PATCH v3 0/6] Refactor epoll handling in preparation for multithreading Laurent Vivier
2025-10-09 13:04 ` [PATCH v3 1/6] util: Simplify epoll_del() interface to take epollfd directly Laurent Vivier
2025-10-16 21:31   ` Stefano Brivio
2025-10-09 13:04 ` [PATCH v3 2/6] epoll_ctl: Extract epoll operations Laurent Vivier
2025-10-13  4:28   ` David Gibson
2025-10-16 21:31   ` Stefano Brivio
2025-10-16 23:07     ` David Gibson
2025-10-09 13:04 ` [PATCH v3 3/6] util: Move epoll registration out of sock_l4_sa() Laurent Vivier
2025-10-13  4:47   ` David Gibson [this message]
2025-10-09 13:04 ` [PATCH v3 4/6] tcp, flow: Replace per-connection in_epoll flag with threadnb in flow_common Laurent Vivier
2025-10-15  3:54   ` David Gibson
2025-10-09 13:04 ` [PATCH v3 5/6] icmp: Use thread-based epoll management for ICMP flows Laurent Vivier
2025-10-09 13:04 ` [PATCH v3 6/6] udp: Use thread-based epoll management for UDP flows Laurent Vivier

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aOyEW0uxHXBQooij@zatzit \
    --to=david@gibson.dropbear.id.au \
    --cc=lvivier@redhat.com \
    --cc=passt-dev@passt.top \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).