public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: passt-dev@passt.top, Stefano Brivio <sbrivio@redhat.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v2 01/11] util: sock_l4() determine protocol from epoll type rather than the reverse
Date: Fri,  5 Jul 2024 20:43:59 +1000	[thread overview]
Message-ID: <20240705104409.3847002-2-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20240705104409.3847002-1-david@gibson.dropbear.id.au>

sock_l4() creates a socket of the given IP protocol number, and adds it to
the epoll state.  Currently it determines the correct tag for the epoll
data based on the protocol.  However, we have some future cases where we
might want different semantics, and therefore epoll types, for sockets of
the same protocol.  So, change sock_l4() to take the epoll type as an
explicit parameter, and determine the protocol from that.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 epoll_type.h | 41 +++++++++++++++++++++++++++++++++++++++++
 icmp.c       |  2 +-
 passt.h      | 32 --------------------------------
 tcp.c        | 10 +++++-----
 udp.c        | 12 ++++++------
 util.c       | 48 ++++++++++++++++++++++++++----------------------
 util.h       |  3 ++-
 7 files changed, 81 insertions(+), 67 deletions(-)
 create mode 100644 epoll_type.h

diff --git a/epoll_type.h b/epoll_type.h
new file mode 100644
index 00000000..b6c04199
--- /dev/null
+++ b/epoll_type.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright Red Hat
+ * Author: David Gibson <david@gibson.dropbear.id.au>
+ */
+
+#ifndef EPOLL_TYPE_H
+#define EPOLL_TYPE_H
+
+/**
+ * enum epoll_type - Different types of fds we poll over
+ */
+enum epoll_type {
+	/* Special value to indicate an invalid type */
+	EPOLL_TYPE_NONE = 0,
+	/* Connected TCP sockets */
+	EPOLL_TYPE_TCP,
+	/* Connected TCP sockets (spliced) */
+	EPOLL_TYPE_TCP_SPLICE,
+	/* Listening TCP sockets */
+	EPOLL_TYPE_TCP_LISTEN,
+	/* timerfds used for TCP timers */
+	EPOLL_TYPE_TCP_TIMER,
+	/* UDP sockets */
+	EPOLL_TYPE_UDP,
+	/* ICMP/ICMPv6 ping sockets */
+	EPOLL_TYPE_PING,
+	/* inotify fd watching for end of netns (pasta) */
+	EPOLL_TYPE_NSQUIT_INOTIFY,
+	/* timer fd watching for end of netns, fallback for inotify (pasta) */
+	EPOLL_TYPE_NSQUIT_TIMER,
+	/* tuntap character device */
+	EPOLL_TYPE_TAP_PASTA,
+	/* socket connected to qemu  */
+	EPOLL_TYPE_TAP_PASST,
+	/* socket listening for qemu socket connections */
+	EPOLL_TYPE_TAP_LISTEN,
+
+	EPOLL_NUM_TYPES,
+};
+
+#endif /* EPOLL_TYPE_H */
diff --git a/icmp.c b/icmp.c
index 80330f6f..d4ccc722 100644
--- a/icmp.c
+++ b/icmp.c
@@ -179,7 +179,7 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
 	}
 
 	ref.flowside = FLOW_SIDX(flow, TGTSIDE);
-	pingf->sock = sock_l4(c, af, flow_proto[flowtype], bind_addr, bind_if,
+	pingf->sock = sock_l4(c, af, EPOLL_TYPE_PING, bind_addr, bind_if,
 			      0, ref.data);
 
 	if (pingf->sock < 0) {
diff --git a/passt.h b/passt.h
index 21cf4c15..867e77b7 100644
--- a/passt.h
+++ b/passt.h
@@ -23,38 +23,6 @@ union epoll_ref;
 #include "tcp.h"
 #include "udp.h"
 
-/**
- * enum epoll_type - Different types of fds we poll over
- */
-enum epoll_type {
-	/* Special value to indicate an invalid type */
-	EPOLL_TYPE_NONE = 0,
-	/* Connected TCP sockets */
-	EPOLL_TYPE_TCP,
-	/* Connected TCP sockets (spliced) */
-	EPOLL_TYPE_TCP_SPLICE,
-	/* Listening TCP sockets */
-	EPOLL_TYPE_TCP_LISTEN,
-	/* timerfds used for TCP timers */
-	EPOLL_TYPE_TCP_TIMER,
-	/* UDP sockets */
-	EPOLL_TYPE_UDP,
-	/* ICMP/ICMPv6 ping sockets */
-	EPOLL_TYPE_PING,
-	/* inotify fd watching for end of netns (pasta) */
-	EPOLL_TYPE_NSQUIT_INOTIFY,
-	/* timer fd watching for end of netns, fallback for inotify (pasta) */
-	EPOLL_TYPE_NSQUIT_TIMER,
-	/* tuntap character device */
-	EPOLL_TYPE_TAP_PASTA,
-	/* socket connected to qemu  */
-	EPOLL_TYPE_TAP_PASST,
-	/* socket listening for qemu socket connections */
-	EPOLL_TYPE_TAP_LISTEN,
-
-	EPOLL_NUM_TYPES,
-};
-
 /**
  * union epoll_ref - Breakdown of reference for epoll fd bookkeeping
  * @type:	Type of fd (tells us what to do with events)
diff --git a/tcp.c b/tcp.c
index 698e7ecb..a490920a 100644
--- a/tcp.c
+++ b/tcp.c
@@ -2467,7 +2467,7 @@ static int tcp_sock_init_af(const struct ctx *c, sa_family_t af, in_port_t port,
 	};
 	int s;
 
-	s = sock_l4(c, af, IPPROTO_TCP, addr, ifname, port, tref.u32);
+	s = sock_l4(c, af, EPOLL_TYPE_TCP_LISTEN, addr, ifname, port, tref.u32);
 
 	if (c->tcp.fwd_in.mode == FWD_AUTO) {
 		if (af == AF_INET  || af == AF_UNSPEC)
@@ -2531,8 +2531,8 @@ static void tcp_ns_sock_init4(const struct ctx *c, in_port_t port)
 
 	ASSERT(c->mode == MODE_PASTA);
 
-	s = sock_l4(c, AF_INET, IPPROTO_TCP, &in4addr_loopback, NULL, port,
-		    tref.u32);
+	s = sock_l4(c, AF_INET, EPOLL_TYPE_TCP_LISTEN, &in4addr_loopback,
+		    NULL, port, tref.u32);
 	if (s >= 0)
 		tcp_sock_set_bufsize(c, s);
 	else
@@ -2557,8 +2557,8 @@ static void tcp_ns_sock_init6(const struct ctx *c, in_port_t port)
 
 	ASSERT(c->mode == MODE_PASTA);
 
-	s = sock_l4(c, AF_INET6, IPPROTO_TCP, &in6addr_loopback, NULL, port,
-		    tref.u32);
+	s = sock_l4(c, AF_INET6, EPOLL_TYPE_TCP_LISTEN, &in6addr_loopback,
+		    NULL, port, tref.u32);
 	if (s >= 0)
 		tcp_sock_set_bufsize(c, s);
 	else
diff --git a/udp.c b/udp.c
index e089ef95..eadf4872 100644
--- a/udp.c
+++ b/udp.c
@@ -917,7 +917,7 @@ int udp_tap_handler(struct ctx *c, uint8_t pif,
 			if (!IN4_IS_ADDR_LOOPBACK(&s_in.sin_addr))
 				bind_addr = c->ip4.addr_out;
 
-			s = sock_l4(c, AF_INET, IPPROTO_UDP, &bind_addr,
+			s = sock_l4(c, AF_INET, EPOLL_TYPE_UDP, &bind_addr,
 				    bind_if, src, uref.u32);
 			if (s < 0)
 				return p->count - idx;
@@ -972,7 +972,7 @@ int udp_tap_handler(struct ctx *c, uint8_t pif,
 			    !IN6_IS_ADDR_LINKLOCAL(&s_in6.sin6_addr))
 				bind_addr = &c->ip6.addr_out;
 
-			s = sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr,
+			s = sock_l4(c, AF_INET6, EPOLL_TYPE_UDP, bind_addr,
 				    bind_if, src, uref.u32);
 			if (s < 0)
 				return p->count - idx;
@@ -1047,13 +1047,13 @@ int udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
 		uref.v6 = 0;
 
 		if (!ns) {
-			r4 = s = sock_l4(c, AF_INET, IPPROTO_UDP, addr,
+			r4 = s = sock_l4(c, AF_INET, EPOLL_TYPE_UDP, addr,
 					 ifname, port, uref.u32);
 
 			udp_tap_map[V4][port].sock = s < 0 ? -1 : s;
 			udp_splice_init[V4][port].sock = s < 0 ? -1 : s;
 		} else {
-			r4 = s = sock_l4(c, AF_INET, IPPROTO_UDP,
+			r4 = s = sock_l4(c, AF_INET, EPOLL_TYPE_UDP,
 					 &in4addr_loopback,
 					 ifname, port, uref.u32);
 			udp_splice_ns[V4][port].sock = s < 0 ? -1 : s;
@@ -1064,13 +1064,13 @@ int udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
 		uref.v6 = 1;
 
 		if (!ns) {
-			r6 = s = sock_l4(c, AF_INET6, IPPROTO_UDP, addr,
+			r6 = s = sock_l4(c, AF_INET6, EPOLL_TYPE_UDP, addr,
 					 ifname, port, uref.u32);
 
 			udp_tap_map[V6][port].sock = s < 0 ? -1 : s;
 			udp_splice_init[V6][port].sock = s < 0 ? -1 : s;
 		} else {
-			r6 = s = sock_l4(c, AF_INET6, IPPROTO_UDP,
+			r6 = s = sock_l4(c, AF_INET6, EPOLL_TYPE_UDP,
 					 &in6addr_loopback,
 					 ifname, port, uref.u32);
 			udp_splice_ns[V6][port].sock = s < 0 ? -1 : s;
diff --git a/util.c b/util.c
index dd2e57f6..9a73fbb9 100644
--- a/util.c
+++ b/util.c
@@ -35,7 +35,7 @@
 /**
  * sock_l4_sa() - Create and bind socket to socket address, add to epoll list
  * @c:		Execution context
- * @proto:	Protocol number
+ * @type:	epoll type
  * @sa:		Socket address to bind to
  * @sl:		Length of @sa
  * @ifname:	Interface for binding, NULL for any
@@ -44,34 +44,38 @@
  *
  * Return: newly created socket, negative error code on failure
  */
-static int sock_l4_sa(const struct ctx *c, uint8_t proto,
+static int sock_l4_sa(const struct ctx *c, enum epoll_type type,
 		      const void *sa, socklen_t sl,
 		      const char *ifname, bool v6only, uint32_t data)
 {
 	sa_family_t af = ((const struct sockaddr *)sa)->sa_family;
-	union epoll_ref ref = { .data = data };
+	union epoll_ref ref = { .type = type, .data = data };
 	struct epoll_event ev;
 	int fd, y = 1, ret;
+	uint8_t proto;
+	int socktype;
 
-	switch (proto) {
-	case IPPROTO_TCP:
-		ref.type = EPOLL_TYPE_TCP_LISTEN;
+	switch (type) {
+	case EPOLL_TYPE_TCP_LISTEN:
+		proto = IPPROTO_TCP;
+		socktype = SOCK_STREAM | SOCK_NONBLOCK;
 		break;
-	case IPPROTO_UDP:
-		ref.type = EPOLL_TYPE_UDP;
+	case EPOLL_TYPE_UDP:
+		proto = IPPROTO_UDP;
+		socktype = SOCK_DGRAM | SOCK_NONBLOCK;
 		break;
-	case IPPROTO_ICMP:
-	case IPPROTO_ICMPV6:
-		ref.type = EPOLL_TYPE_PING;
+	case EPOLL_TYPE_PING:
+		if (af == AF_INET)
+			proto = IPPROTO_ICMP;
+		else
+			proto = IPPROTO_ICMPV6;
+		socktype = SOCK_DGRAM | SOCK_NONBLOCK;
 		break;
 	default:
-		return -EPFNOSUPPORT;	/* Not implemented. */
+		ASSERT(0);
 	}
 
-	if (proto == IPPROTO_TCP)
-		fd = socket(af, SOCK_STREAM | SOCK_NONBLOCK, proto);
-	else
-		fd = socket(af, SOCK_DGRAM | SOCK_NONBLOCK, proto);
+	fd = socket(af, socktype, proto);
 
 	ret = -errno;
 	if (fd < 0) {
@@ -118,14 +122,14 @@ static int sock_l4_sa(const struct ctx *c, uint8_t proto,
 		 * this is fine. This might also fail for ICMP because of a
 		 * broken SELinux policy, see icmp_tap_handler().
 		 */
-		if (proto != IPPROTO_ICMP && proto != IPPROTO_ICMPV6) {
+		if (type != EPOLL_TYPE_PING) {
 			ret = -errno;
 			close(fd);
 			return ret;
 		}
 	}
 
-	if (proto == IPPROTO_TCP && listen(fd, 128) < 0) {
+	if (type == EPOLL_TYPE_TCP_LISTEN && listen(fd, 128) < 0) {
 		ret = -errno;
 		warn("TCP socket listen: %s", strerror(-ret));
 		close(fd);
@@ -146,7 +150,7 @@ static int sock_l4_sa(const struct ctx *c, uint8_t proto,
  * sock_l4() - Create and bind socket for given L4, add to epoll list
  * @c:		Execution context
  * @af:		Address family, AF_INET or AF_INET6
- * @proto:	Protocol number
+ * @type:	epoll type
  * @bind_addr:	Address for binding, NULL for any
  * @ifname:	Interface for binding, NULL for any
  * @port:	Port, host order
@@ -154,7 +158,7 @@ static int sock_l4_sa(const struct ctx *c, uint8_t proto,
  *
  * Return: newly created socket, negative error code on failure
  */
-int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto,
+int sock_l4(const struct ctx *c, sa_family_t af, enum epoll_type type,
 	    const void *bind_addr, const char *ifname, uint16_t port,
 	    uint32_t data)
 {
@@ -167,7 +171,7 @@ int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto,
 		};
 		if (bind_addr)
 			addr4.sin_addr = *(struct in_addr *)bind_addr;
-		return sock_l4_sa(c, proto, &addr4, sizeof(addr4), ifname,
+		return sock_l4_sa(c, type, &addr4, sizeof(addr4), ifname,
 				  false, data);
 	}
 
@@ -188,7 +192,7 @@ int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto,
 			    sizeof(c->ip6.addr_ll)))
 				addr6.sin6_scope_id = c->ifi6;
 		}
-		return sock_l4_sa(c, proto, &addr6, sizeof(addr6), ifname,
+		return sock_l4_sa(c, type, &addr6, sizeof(addr6), ifname,
 				  af == AF_INET6, data);
 	}
 	default:
diff --git a/util.h b/util.h
index eebb027b..d0150396 100644
--- a/util.h
+++ b/util.h
@@ -137,13 +137,14 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
 #include <limits.h>
 #include <stdint.h>
 
+#include "epoll_type.h"
 #include "packet.h"
 
 struct ctx;
 
 /* cppcheck-suppress funcArgNamesDifferent */
 __attribute__ ((weak)) int ffsl(long int i) { return __builtin_ffsl(i); }
-int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto,
+int sock_l4(const struct ctx *c, sa_family_t af, enum epoll_type type,
 	    const void *bind_addr, const char *ifname, uint16_t port,
 	    uint32_t data);
 void sock_probe_mem(struct ctx *c);
-- 
@@ -137,13 +137,14 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
 #include <limits.h>
 #include <stdint.h>
 
+#include "epoll_type.h"
 #include "packet.h"
 
 struct ctx;
 
 /* cppcheck-suppress funcArgNamesDifferent */
 __attribute__ ((weak)) int ffsl(long int i) { return __builtin_ffsl(i); }
-int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto,
+int sock_l4(const struct ctx *c, sa_family_t af, enum epoll_type type,
 	    const void *bind_addr, const char *ifname, uint16_t port,
 	    uint32_t data);
 void sock_probe_mem(struct ctx *c);
-- 
2.45.2


  reply	other threads:[~2024-07-05 10:44 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-05 10:43 [PATCH v2 00/11] Preliminaries for UDP flow support David Gibson
2024-07-05 10:43 ` David Gibson [this message]
2024-07-05 10:44 ` [PATCH v2 02/11] flow: Add flow_sidx_valid() helper David Gibson
2024-07-05 10:44 ` [PATCH v2 03/11] udp: Pass full epoll reference through more of sock handler path David Gibson
2024-07-05 10:44 ` [PATCH v2 04/11] udp: Rename IOV and mmsghdr arrays David Gibson
2024-07-05 10:44 ` [PATCH v2 05/11] udp: Unify udp[46]_mh_splice David Gibson
2024-07-05 10:44 ` [PATCH v2 06/11] udp: Unify udp[46]_l2_iov David Gibson
2024-07-05 10:44 ` [PATCH v2 07/11] udp: Don't repeatedly initialise udp[46]_eth_hdr David Gibson
2024-07-05 10:44 ` [PATCH v2 08/11] udp: Move some more of sock_handler tasks into sub-functions David Gibson
2024-07-05 10:44 ` [PATCH v2 09/11] udp: Consolidate datagram batching David Gibson
2024-07-05 10:44 ` [PATCH v2 10/11] doc: Add program to document and test assumptions about SO_REUSEADDR David Gibson
2024-07-12 11:42   ` David Taylor
2024-07-15  0:43     ` David Gibson
2024-07-05 10:44 ` [PATCH v2 11/11] doc: Test behaviour of zero length datagram recv()s David Gibson
2024-07-05 16:38 ` [PATCH v2 00/11] Preliminaries for UDP flow support Stefano Brivio

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240705104409.3847002-2-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).