From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mail.ozlabs.org (gandalf.ozlabs.org [150.107.74.76]) by passt.top (Postfix) with ESMTPS id 0C6585A004E for ; Fri, 05 Jul 2024 12:44:29 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gibson.dropbear.id.au; s=202312; t=1720176251; bh=IUN5IUWGp6wCFGjJas38nImmHF60ngnAEc1fJy5s+SA=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=eiOiuubzxwAud8FouolzsNkTMcqNUo3A60IRMr457M9xkFWOZlN6XdCofzKFL+QNs /6srazOhHVuy9B4e9NsfkTaUogg7dOZyfPvm/XUS1PE2HyCFy1dPevBCmKtIQvm3AV swo/UoESzw1ueZHP9u1/y/KZYhB3fAz4/8CQh7TmqkiicYwl/f/G6RXHJt3TBHhEFx o5lMPw8eb4rZt6NZRRZQ7LTYQh+tgbPcN+/x79ROnTPJIavZxzxg+vp1Tlusrzgvvz IU3Xs98nwvs96b7k6PKG1KhcPWYubSbS9S99LODXpWaDHoo+meLsm9qOypJpWTO8ZY U5Y4Q57LFtqdg== Received: by gandalf.ozlabs.org (Postfix, from userid 1007) id 4WFqrW6XR1z4wxx; Fri, 5 Jul 2024 20:44:11 +1000 (AEST) From: David Gibson To: passt-dev@passt.top, Stefano Brivio Subject: [PATCH v2 01/11] util: sock_l4() determine protocol from epoll type rather than the reverse Date: Fri, 5 Jul 2024 20:43:59 +1000 Message-ID: <20240705104409.3847002-2-david@gibson.dropbear.id.au> X-Mailer: git-send-email 2.45.2 In-Reply-To: <20240705104409.3847002-1-david@gibson.dropbear.id.au> References: <20240705104409.3847002-1-david@gibson.dropbear.id.au> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Message-ID-Hash: 5EUSDBDNB4IZQ3XCLMNO37OFFKGEW2D2 X-Message-ID-Hash: 5EUSDBDNB4IZQ3XCLMNO37OFFKGEW2D2 X-MailFrom: dgibson@gandalf.ozlabs.org X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header CC: David Gibson X-Mailman-Version: 3.3.8 Precedence: list List-Id: Development discussion and patches for passt Archived-At: Archived-At: List-Archive: List-Archive: List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: sock_l4() creates a socket of the given IP protocol number, and adds it to the epoll state. Currently it determines the correct tag for the epoll data based on the protocol. However, we have some future cases where we might want different semantics, and therefore epoll types, for sockets of the same protocol. So, change sock_l4() to take the epoll type as an explicit parameter, and determine the protocol from that. Signed-off-by: David Gibson --- epoll_type.h | 41 +++++++++++++++++++++++++++++++++++++++++ icmp.c | 2 +- passt.h | 32 -------------------------------- tcp.c | 10 +++++----- udp.c | 12 ++++++------ util.c | 48 ++++++++++++++++++++++++++---------------------- util.h | 3 ++- 7 files changed, 81 insertions(+), 67 deletions(-) create mode 100644 epoll_type.h diff --git a/epoll_type.h b/epoll_type.h new file mode 100644 index 00000000..b6c04199 --- /dev/null +++ b/epoll_type.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright Red Hat + * Author: David Gibson + */ + +#ifndef EPOLL_TYPE_H +#define EPOLL_TYPE_H + +/** + * enum epoll_type - Different types of fds we poll over + */ +enum epoll_type { + /* Special value to indicate an invalid type */ + EPOLL_TYPE_NONE = 0, + /* Connected TCP sockets */ + EPOLL_TYPE_TCP, + /* Connected TCP sockets (spliced) */ + EPOLL_TYPE_TCP_SPLICE, + /* Listening TCP sockets */ + EPOLL_TYPE_TCP_LISTEN, + /* timerfds used for TCP timers */ + EPOLL_TYPE_TCP_TIMER, + /* UDP sockets */ + EPOLL_TYPE_UDP, + /* ICMP/ICMPv6 ping sockets */ + EPOLL_TYPE_PING, + /* inotify fd watching for end of netns (pasta) */ + EPOLL_TYPE_NSQUIT_INOTIFY, + /* timer fd watching for end of netns, fallback for inotify (pasta) */ + EPOLL_TYPE_NSQUIT_TIMER, + /* tuntap character device */ + EPOLL_TYPE_TAP_PASTA, + /* socket connected to qemu */ + EPOLL_TYPE_TAP_PASST, + /* socket listening for qemu socket connections */ + EPOLL_TYPE_TAP_LISTEN, + + EPOLL_NUM_TYPES, +}; + +#endif /* EPOLL_TYPE_H */ diff --git a/icmp.c b/icmp.c index 80330f6f..d4ccc722 100644 --- a/icmp.c +++ b/icmp.c @@ -179,7 +179,7 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c, } ref.flowside = FLOW_SIDX(flow, TGTSIDE); - pingf->sock = sock_l4(c, af, flow_proto[flowtype], bind_addr, bind_if, + pingf->sock = sock_l4(c, af, EPOLL_TYPE_PING, bind_addr, bind_if, 0, ref.data); if (pingf->sock < 0) { diff --git a/passt.h b/passt.h index 21cf4c15..867e77b7 100644 --- a/passt.h +++ b/passt.h @@ -23,38 +23,6 @@ union epoll_ref; #include "tcp.h" #include "udp.h" -/** - * enum epoll_type - Different types of fds we poll over - */ -enum epoll_type { - /* Special value to indicate an invalid type */ - EPOLL_TYPE_NONE = 0, - /* Connected TCP sockets */ - EPOLL_TYPE_TCP, - /* Connected TCP sockets (spliced) */ - EPOLL_TYPE_TCP_SPLICE, - /* Listening TCP sockets */ - EPOLL_TYPE_TCP_LISTEN, - /* timerfds used for TCP timers */ - EPOLL_TYPE_TCP_TIMER, - /* UDP sockets */ - EPOLL_TYPE_UDP, - /* ICMP/ICMPv6 ping sockets */ - EPOLL_TYPE_PING, - /* inotify fd watching for end of netns (pasta) */ - EPOLL_TYPE_NSQUIT_INOTIFY, - /* timer fd watching for end of netns, fallback for inotify (pasta) */ - EPOLL_TYPE_NSQUIT_TIMER, - /* tuntap character device */ - EPOLL_TYPE_TAP_PASTA, - /* socket connected to qemu */ - EPOLL_TYPE_TAP_PASST, - /* socket listening for qemu socket connections */ - EPOLL_TYPE_TAP_LISTEN, - - EPOLL_NUM_TYPES, -}; - /** * union epoll_ref - Breakdown of reference for epoll fd bookkeeping * @type: Type of fd (tells us what to do with events) diff --git a/tcp.c b/tcp.c index 698e7ecb..a490920a 100644 --- a/tcp.c +++ b/tcp.c @@ -2467,7 +2467,7 @@ static int tcp_sock_init_af(const struct ctx *c, sa_family_t af, in_port_t port, }; int s; - s = sock_l4(c, af, IPPROTO_TCP, addr, ifname, port, tref.u32); + s = sock_l4(c, af, EPOLL_TYPE_TCP_LISTEN, addr, ifname, port, tref.u32); if (c->tcp.fwd_in.mode == FWD_AUTO) { if (af == AF_INET || af == AF_UNSPEC) @@ -2531,8 +2531,8 @@ static void tcp_ns_sock_init4(const struct ctx *c, in_port_t port) ASSERT(c->mode == MODE_PASTA); - s = sock_l4(c, AF_INET, IPPROTO_TCP, &in4addr_loopback, NULL, port, - tref.u32); + s = sock_l4(c, AF_INET, EPOLL_TYPE_TCP_LISTEN, &in4addr_loopback, + NULL, port, tref.u32); if (s >= 0) tcp_sock_set_bufsize(c, s); else @@ -2557,8 +2557,8 @@ static void tcp_ns_sock_init6(const struct ctx *c, in_port_t port) ASSERT(c->mode == MODE_PASTA); - s = sock_l4(c, AF_INET6, IPPROTO_TCP, &in6addr_loopback, NULL, port, - tref.u32); + s = sock_l4(c, AF_INET6, EPOLL_TYPE_TCP_LISTEN, &in6addr_loopback, + NULL, port, tref.u32); if (s >= 0) tcp_sock_set_bufsize(c, s); else diff --git a/udp.c b/udp.c index e089ef95..eadf4872 100644 --- a/udp.c +++ b/udp.c @@ -917,7 +917,7 @@ int udp_tap_handler(struct ctx *c, uint8_t pif, if (!IN4_IS_ADDR_LOOPBACK(&s_in.sin_addr)) bind_addr = c->ip4.addr_out; - s = sock_l4(c, AF_INET, IPPROTO_UDP, &bind_addr, + s = sock_l4(c, AF_INET, EPOLL_TYPE_UDP, &bind_addr, bind_if, src, uref.u32); if (s < 0) return p->count - idx; @@ -972,7 +972,7 @@ int udp_tap_handler(struct ctx *c, uint8_t pif, !IN6_IS_ADDR_LINKLOCAL(&s_in6.sin6_addr)) bind_addr = &c->ip6.addr_out; - s = sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, + s = sock_l4(c, AF_INET6, EPOLL_TYPE_UDP, bind_addr, bind_if, src, uref.u32); if (s < 0) return p->count - idx; @@ -1047,13 +1047,13 @@ int udp_sock_init(const struct ctx *c, int ns, sa_family_t af, uref.v6 = 0; if (!ns) { - r4 = s = sock_l4(c, AF_INET, IPPROTO_UDP, addr, + r4 = s = sock_l4(c, AF_INET, EPOLL_TYPE_UDP, addr, ifname, port, uref.u32); udp_tap_map[V4][port].sock = s < 0 ? -1 : s; udp_splice_init[V4][port].sock = s < 0 ? -1 : s; } else { - r4 = s = sock_l4(c, AF_INET, IPPROTO_UDP, + r4 = s = sock_l4(c, AF_INET, EPOLL_TYPE_UDP, &in4addr_loopback, ifname, port, uref.u32); udp_splice_ns[V4][port].sock = s < 0 ? -1 : s; @@ -1064,13 +1064,13 @@ int udp_sock_init(const struct ctx *c, int ns, sa_family_t af, uref.v6 = 1; if (!ns) { - r6 = s = sock_l4(c, AF_INET6, IPPROTO_UDP, addr, + r6 = s = sock_l4(c, AF_INET6, EPOLL_TYPE_UDP, addr, ifname, port, uref.u32); udp_tap_map[V6][port].sock = s < 0 ? -1 : s; udp_splice_init[V6][port].sock = s < 0 ? -1 : s; } else { - r6 = s = sock_l4(c, AF_INET6, IPPROTO_UDP, + r6 = s = sock_l4(c, AF_INET6, EPOLL_TYPE_UDP, &in6addr_loopback, ifname, port, uref.u32); udp_splice_ns[V6][port].sock = s < 0 ? -1 : s; diff --git a/util.c b/util.c index dd2e57f6..9a73fbb9 100644 --- a/util.c +++ b/util.c @@ -35,7 +35,7 @@ /** * sock_l4_sa() - Create and bind socket to socket address, add to epoll list * @c: Execution context - * @proto: Protocol number + * @type: epoll type * @sa: Socket address to bind to * @sl: Length of @sa * @ifname: Interface for binding, NULL for any @@ -44,34 +44,38 @@ * * Return: newly created socket, negative error code on failure */ -static int sock_l4_sa(const struct ctx *c, uint8_t proto, +static int sock_l4_sa(const struct ctx *c, enum epoll_type type, const void *sa, socklen_t sl, const char *ifname, bool v6only, uint32_t data) { sa_family_t af = ((const struct sockaddr *)sa)->sa_family; - union epoll_ref ref = { .data = data }; + union epoll_ref ref = { .type = type, .data = data }; struct epoll_event ev; int fd, y = 1, ret; + uint8_t proto; + int socktype; - switch (proto) { - case IPPROTO_TCP: - ref.type = EPOLL_TYPE_TCP_LISTEN; + switch (type) { + case EPOLL_TYPE_TCP_LISTEN: + proto = IPPROTO_TCP; + socktype = SOCK_STREAM | SOCK_NONBLOCK; break; - case IPPROTO_UDP: - ref.type = EPOLL_TYPE_UDP; + case EPOLL_TYPE_UDP: + proto = IPPROTO_UDP; + socktype = SOCK_DGRAM | SOCK_NONBLOCK; break; - case IPPROTO_ICMP: - case IPPROTO_ICMPV6: - ref.type = EPOLL_TYPE_PING; + case EPOLL_TYPE_PING: + if (af == AF_INET) + proto = IPPROTO_ICMP; + else + proto = IPPROTO_ICMPV6; + socktype = SOCK_DGRAM | SOCK_NONBLOCK; break; default: - return -EPFNOSUPPORT; /* Not implemented. */ + ASSERT(0); } - if (proto == IPPROTO_TCP) - fd = socket(af, SOCK_STREAM | SOCK_NONBLOCK, proto); - else - fd = socket(af, SOCK_DGRAM | SOCK_NONBLOCK, proto); + fd = socket(af, socktype, proto); ret = -errno; if (fd < 0) { @@ -118,14 +122,14 @@ static int sock_l4_sa(const struct ctx *c, uint8_t proto, * this is fine. This might also fail for ICMP because of a * broken SELinux policy, see icmp_tap_handler(). */ - if (proto != IPPROTO_ICMP && proto != IPPROTO_ICMPV6) { + if (type != EPOLL_TYPE_PING) { ret = -errno; close(fd); return ret; } } - if (proto == IPPROTO_TCP && listen(fd, 128) < 0) { + if (type == EPOLL_TYPE_TCP_LISTEN && listen(fd, 128) < 0) { ret = -errno; warn("TCP socket listen: %s", strerror(-ret)); close(fd); @@ -146,7 +150,7 @@ static int sock_l4_sa(const struct ctx *c, uint8_t proto, * sock_l4() - Create and bind socket for given L4, add to epoll list * @c: Execution context * @af: Address family, AF_INET or AF_INET6 - * @proto: Protocol number + * @type: epoll type * @bind_addr: Address for binding, NULL for any * @ifname: Interface for binding, NULL for any * @port: Port, host order @@ -154,7 +158,7 @@ static int sock_l4_sa(const struct ctx *c, uint8_t proto, * * Return: newly created socket, negative error code on failure */ -int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto, +int sock_l4(const struct ctx *c, sa_family_t af, enum epoll_type type, const void *bind_addr, const char *ifname, uint16_t port, uint32_t data) { @@ -167,7 +171,7 @@ int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto, }; if (bind_addr) addr4.sin_addr = *(struct in_addr *)bind_addr; - return sock_l4_sa(c, proto, &addr4, sizeof(addr4), ifname, + return sock_l4_sa(c, type, &addr4, sizeof(addr4), ifname, false, data); } @@ -188,7 +192,7 @@ int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto, sizeof(c->ip6.addr_ll))) addr6.sin6_scope_id = c->ifi6; } - return sock_l4_sa(c, proto, &addr6, sizeof(addr6), ifname, + return sock_l4_sa(c, type, &addr6, sizeof(addr6), ifname, af == AF_INET6, data); } default: diff --git a/util.h b/util.h index eebb027b..d0150396 100644 --- a/util.h +++ b/util.h @@ -137,13 +137,14 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags, #include #include +#include "epoll_type.h" #include "packet.h" struct ctx; /* cppcheck-suppress funcArgNamesDifferent */ __attribute__ ((weak)) int ffsl(long int i) { return __builtin_ffsl(i); } -int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto, +int sock_l4(const struct ctx *c, sa_family_t af, enum epoll_type type, const void *bind_addr, const char *ifname, uint16_t port, uint32_t data); void sock_probe_mem(struct ctx *c); -- 2.45.2