From mboxrd@z Thu Jan 1 00:00:00 1970 Received: by passt.top (Postfix, from userid 1000) id 6F3A15A026D; Sun, 22 Mar 2026 15:18:43 +0100 (CET) From: Stefano Brivio To: passt-dev@passt.top Subject: [PATCH 17/18] conf, fwd, ports, util: Move things around for pesto Date: Sun, 22 Mar 2026 15:18:42 +0100 Message-ID: <20260322141843.4095972-2-sbrivio@redhat.com> X-Mailer: git-send-email 2.43.0 In-Reply-To: <20260319061157.1983818-1-david@gibson.dropbear.id.au> References: <20260319061157.1983818-1-david@gibson.dropbear.id.au> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Message-ID-Hash: KVR22J4QIUPT2I6AYROAYWMAYNQF3JUX X-Message-ID-Hash: KVR22J4QIUPT2I6AYROAYWMAYNQF3JUX X-MailFrom: sbrivio@passt.top X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header CC: David Gibson X-Mailman-Version: 3.3.8 Precedence: list List-Id: Development discussion and patches for passt Archived-At: Archived-At: List-Archive: List-Archive: List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: ...so that pesto can reuse functions parsing port forwarding specifications from ports.c: - checks on validity of some forwarding options (auto with pasta only all with passt only) move to the caller, conf() - some other checks (availability of IPv4, IPv6, SO_BINDTODEVICE) are now based on specific parameters passed to conf_ports() and conf_ports_range_except(), so that we don't need struct ctx there - bitmap operations and some convenience macros move to common.h - fwd_probe_ephemeral(), fwd_port_is_ephemeral(), and fwd_rule_add() move to ports.c and fwd_rule.c, without any renaming for the moment - forwarding table definitions move to fwd_rule.h - selection of the definition of some logging functions now depends on a gross but unintrusive hack, a PESTO define (this will needs a cleaner solution later) Signed-off-by: Stefano Brivio --- Makefile | 8 ++- common.h | 99 ++++++++++++++++++++++++++++ conf.c | 54 +++++++++++----- fwd.c | 160 --------------------------------------------- fwd.h | 54 +--------------- fwd_rule.c | 89 ++++++++++++++++++++++++- fwd_rule.h | 53 +++++++++++++++ lineread.c | 1 - log.h | 33 ++++++++++ pesto.c | 21 ++---- ports.c | 186 ++++++++++++++++++++++++++++++++++++----------------- ports.h | 29 ++++++--- util.c | 84 ------------------------ util.h | 18 ------ 14 files changed, 470 insertions(+), 419 deletions(-) diff --git a/Makefile b/Makefile index 47d4c95..b6b6a82 100644 --- a/Makefile +++ b/Makefile @@ -45,12 +45,13 @@ PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c epoll_ctl.c \ vhost_user.c virtio.c vu_common.c QRAP_SRCS = qrap.c PASST_REPAIR_SRCS = passt-repair.c -PESTO_SRCS = pesto.c fwd_rule.c inany.c ip.c serialise.c +PESTO_SRCS = pesto.c fwd_rule.c inany.c ip.c serialise.c ports.c lineread.c SRCS = $(PASST_SRCS) $(QRAP_SRCS) $(PASST_REPAIR_SRCS) $(PESTO_SRCS) MANPAGES = passt.1 pasta.1 pesto.1 qrap.1 passt-repair.1 -PESTO_HEADERS = common.h fwd_rule.h inany.h ip.h pesto.h serialise.h +PESTO_HEADERS = common.h fwd_rule.h inany.h ip.h pesto.h serialise.h ports.h \ + lineread.h PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h epoll_ctl.h \ flow.h fwd.h flow_table.h icmp.h icmp_flow.h iov.h isolation.h \ lineread.h log.h migrate.h ndp.h netlink.h packet.h passt.h pasta.h \ @@ -117,7 +118,8 @@ passt-repair: $(PASST_REPAIR_SRCS) seccomp_repair.h $(CC) $(FLAGS) $(CFLAGS) $(CPPFLAGS) $(PASST_REPAIR_SRCS) -o passt-repair $(LDFLAGS) pesto: $(PESTO_SRCS) $(PESTO_HEADERS) seccomp_pesto.h - $(CC) $(FLAGS) $(CFLAGS) $(CPPFLAGS) $(PESTO_SRCS) -o pesto $(LDFLAGS) + $(CC) $(FLAGS) $(CFLAGS) $(CPPFLAGS) $(PESTO_SRCS) -DPESTO -o pesto \ + $(LDFLAGS) valgrind: EXTRA_SYSCALLS += rt_sigprocmask rt_sigtimedwait rt_sigaction \ rt_sigreturn getpid gettid kill clock_gettime \ diff --git a/common.h b/common.h index 7d37b92..43b1455 100644 --- a/common.h +++ b/common.h @@ -8,6 +8,9 @@ #ifndef COMMON_H #define COMMON_H +#include +#include + #define VERSION_BLOB \ VERSION "\n" \ "Copyright Red Hat\n" \ @@ -19,6 +22,8 @@ /* FPRINTF() intentionally silences cert-err33-c clang-tidy warnings */ #define FPRINTF(f, ...) (void)fprintf(f, __VA_ARGS__) +#define ARRAY_SIZE(a) ((int)(sizeof(a) / sizeof((a)[0]))) + #ifndef MIN #define MIN(x, y) (((x) < (y)) ? (x) : (y)) #endif @@ -26,7 +31,13 @@ #define MAX(x, y) (((x) > (y)) ? (x) : (y)) #endif +#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) +#define DIV_ROUND_CLOSEST(n, d) (((n) + (d) / 2) / (d)) +#define ROUND_DOWN(x, y) ((x) & ~((y) - 1)) +#define ROUND_UP(x, y) (((x) + (y) - 1) & ~((y) - 1)) + #define BIT(n) (1UL << (n)) +#define MAX_FROM_BITS(n) (((1U << (n)) - 1)) #ifndef __bswap_constant_16 #define __bswap_constant_16(x) \ @@ -76,4 +87,92 @@ #define ntohll(x) (be64toh((x))) #define htonll(x) (htobe64((x))) +#define BITMAP_BIT(n) (BIT((n) % (sizeof(long) * 8))) +#define BITMAP_WORD(n) (n / (sizeof(long) * 8)) + +/** + * bitmap_set() - Set single bit in bitmap + * @map: Pointer to bitmap + * @bit: Bit number to set + */ +static inline void bitmap_set(uint8_t *map, unsigned bit) +{ + unsigned long *word = (unsigned long *)map + BITMAP_WORD(bit); + + *word |= BITMAP_BIT(bit); +} + +/** + * bitmap_clear() - Clear single bit in bitmap + * @map: Pointer to bitmap + * @bit: Bit number to clear + */ +/* cppcheck-suppress unusedFunction */ +static inline void bitmap_clear(uint8_t *map, unsigned bit) +{ + unsigned long *word = (unsigned long *)map + BITMAP_WORD(bit); + + *word &= ~BITMAP_BIT(bit); +} + +/** + * bitmap_isset() - Check for set bit in bitmap + * @map: Pointer to bitmap + * @bit: Bit number to check + * + * Return: true if given bit is set, false if it's not + */ +static inline bool bitmap_isset(const uint8_t *map, unsigned bit) +{ + const unsigned long *word + = (const unsigned long *)map + BITMAP_WORD(bit); + + return !!(*word & BITMAP_BIT(bit)); +} + +/** + * bitmap_or() - Logical disjunction (OR) of two bitmaps + * @dst: Pointer to result bitmap + * @size: Size of bitmaps, in bytes + * @a: First operand + * @b: Second operand + */ +/* cppcheck-suppress unusedFunction */ +static inline void bitmap_or(uint8_t *dst, size_t size, + const uint8_t *a, const uint8_t *b) +{ + unsigned long *dw = (unsigned long *)dst; + unsigned long *aw = (unsigned long *)a; + unsigned long *bw = (unsigned long *)b; + size_t i; + + for (i = 0; i < size / sizeof(long); i++, dw++, aw++, bw++) + *dw = *aw | *bw; + + for (i = size / sizeof(long) * sizeof(long); i < size; i++) + dst[i] = a[i] | b[i]; +} + +/** + * bitmap_and_not() - Logical conjunction with complement (AND NOT) of bitmap + * @dst: Pointer to result bitmap + * @size: Size of bitmaps, in bytes + * @a: First operand + * @b: Second operand + */ +static inline void bitmap_and_not(uint8_t *dst, size_t size, + const uint8_t *a, const uint8_t *b) +{ + unsigned long *dw = (unsigned long *)dst; + unsigned long *aw = (unsigned long *)a; + unsigned long *bw = (unsigned long *)b; + size_t i; + + for (i = 0; i < size / sizeof(long); i++, dw++, aw++, bw++) + *dw = *aw & ~*bw; + + for (i = size / sizeof(long) * sizeof(long); i < size; i++) + dst[i] = a[i] & ~b[i]; +} + #endif /* _COMMON_H */ diff --git a/conf.c b/conf.c index de4c3c6..8e54b85 100644 --- a/conf.c +++ b/conf.c @@ -1846,18 +1846,36 @@ void conf(struct ctx *c, int argc, char **argv) do { name = getopt_long(argc, argv, optstring, options, NULL); + if (name != 't' && name != 'T' && name != 'u' && name != 'U') + continue; + + if ((name == 't' || name == 'T') && c->no_tcp) + die("TCP forwarding requested but TCP is disabled"); + if ((name == 'u' || name == 'U') && c->no_udp) + die("UDP forwarding requested but UDP is disabled"); + + if (!strcmp(optarg, "auto") && c->mode != MODE_PASTA) + die("'auto' port forwarding is only allowed for pasta"); + + if (!strcmp(optarg, "all") && c->mode == MODE_PASTA) + die("'all' port forwarding is only allowed for passt"); + if (name == 't') { - conf_ports(c, name, optarg, c->fwd[PIF_HOST], - &tcp_in_mode); + conf_ports(name, optarg, c->fwd[PIF_HOST], + &tcp_in_mode, !c->no_bindtodevice, + !!c->ifi4, !!c->ifi6); } else if (name == 'u') { - conf_ports(c, name, optarg, c->fwd[PIF_HOST], - &udp_in_mode); + conf_ports(name, optarg, c->fwd[PIF_HOST], + &udp_in_mode, !c->no_bindtodevice, + !!c->ifi4, !!c->ifi6); } else if (name == 'T') { - conf_ports(c, name, optarg, c->fwd[PIF_SPLICE], - &tcp_out_mode); + conf_ports(name, optarg, c->fwd[PIF_SPLICE], + &tcp_out_mode, !c->no_bindtodevice, + !!c->ifi4, !!c->ifi6); } else if (name == 'U') { - conf_ports(c, name, optarg, c->fwd[PIF_SPLICE], - &udp_out_mode); + conf_ports(name, optarg, c->fwd[PIF_SPLICE], + &udp_out_mode, !c->no_bindtodevice, + !!c->ifi4, !!c->ifi6); } } while (name != -1); @@ -1917,24 +1935,28 @@ void conf(struct ctx *c, int argc, char **argv) udp_out_mode = fwd_default; if (tcp_in_mode == FWD_MODE_AUTO) { - conf_ports_range_except(c, 't', "auto", c->fwd[PIF_HOST], + conf_ports_range_except('t', "auto", c->fwd[PIF_HOST], NULL, NULL, 1, NUM_PORTS - 1, NULL, 1, - FWD_SCAN); + FWD_SCAN, !c->no_bindtodevice, + !!c->ifi4, !!c->ifi6); } if (tcp_out_mode == FWD_MODE_AUTO) { - conf_ports_range_except(c, 'T', "auto", c->fwd[PIF_SPLICE], + conf_ports_range_except('T', "auto", c->fwd[PIF_SPLICE], NULL, "lo", 1, NUM_PORTS - 1, NULL, 1, - FWD_SCAN); + FWD_SCAN, !c->no_bindtodevice, + !!c->ifi4, !!c->ifi6); } if (udp_in_mode == FWD_MODE_AUTO) { - conf_ports_range_except(c, 'u', "auto", c->fwd[PIF_HOST], + conf_ports_range_except('u', "auto", c->fwd[PIF_HOST], NULL, NULL, 1, NUM_PORTS - 1, NULL, 1, - FWD_SCAN); + FWD_SCAN, !c->no_bindtodevice, + !!c->ifi4, !!c->ifi6); } if (udp_out_mode == FWD_MODE_AUTO) { - conf_ports_range_except(c, 'U', "auto", c->fwd[PIF_SPLICE], + conf_ports_range_except('U', "auto", c->fwd[PIF_SPLICE], NULL, "lo", 1, NUM_PORTS - 1, NULL, 1, - FWD_SCAN); + FWD_SCAN, !c->no_bindtodevice, + !!c->ifi4, !!c->ifi6); } conf_sock_listen(c); diff --git a/fwd.c b/fwd.c index 72028e7..4592af2 100644 --- a/fwd.c +++ b/fwd.c @@ -34,12 +34,6 @@ #include "arp.h" #include "ndp.h" -/* Ephemeral port range: values from RFC 6335 */ -static in_port_t fwd_ephemeral_min = (1 << 15) + (1 << 14); -static in_port_t fwd_ephemeral_max = NUM_PORTS - 1; - -#define PORT_RANGE_SYSCTL "/proc/sys/net/ipv4/ip_local_port_range" - #define NEIGH_TABLE_SLOTS 1024 #define NEIGH_TABLE_SIZE (NEIGH_TABLE_SLOTS / 2) static_assert((NEIGH_TABLE_SLOTS & (NEIGH_TABLE_SLOTS - 1)) == 0, @@ -249,74 +243,6 @@ void fwd_neigh_table_init(const struct ctx *c) fwd_neigh_table_update(c, &mga, c->our_tap_mac, true); } -/** fwd_probe_ephemeral() - Determine what ports this host considers ephemeral - * - * Work out what ports the host thinks are emphemeral and record it for later - * use by fwd_port_is_ephemeral(). If we're unable to probe, assume the range - * recommended by RFC 6335. - */ -void fwd_probe_ephemeral(void) -{ - char *line, *tab, *end; - struct lineread lr; - long min, max; - ssize_t len; - int fd; - - fd = open(PORT_RANGE_SYSCTL, O_RDONLY | O_CLOEXEC); - if (fd < 0) { - warn_perror("Unable to open %s", PORT_RANGE_SYSCTL); - return; - } - - lineread_init(&lr, fd); - len = lineread_get(&lr, &line); - close(fd); - - if (len < 0) - goto parse_err; - - tab = strchr(line, '\t'); - if (!tab) - goto parse_err; - *tab = '\0'; - - errno = 0; - min = strtol(line, &end, 10); - if (*end || errno) - goto parse_err; - - errno = 0; - max = strtol(tab + 1, &end, 10); - if (*end || errno) - goto parse_err; - - if (min < 0 || min >= (long)NUM_PORTS || - max < 0 || max >= (long)NUM_PORTS) - goto parse_err; - - fwd_ephemeral_min = min; - fwd_ephemeral_max = max; - - return; - -parse_err: - warn("Unable to parse %s", PORT_RANGE_SYSCTL); -} - -/** - * fwd_port_is_ephemeral() - Is port number ephemeral? - * @port: Port number - * - * Return: true if @port is ephemeral, that is may be allocated by the kernel as - * a local port for outgoing connections or datagrams, but should not be - * used for binding services to. - */ -bool fwd_port_is_ephemeral(in_port_t port) -{ - return (port >= fwd_ephemeral_min) && (port <= fwd_ephemeral_max); -} - /* Forwarding table storage, generally accessed via pointers in struct ctx */ static struct fwd_table fwd_in; static struct fwd_table fwd_out; @@ -332,92 +258,6 @@ void fwd_rule_init(struct ctx *c) c->fwd[PIF_SPLICE] = &fwd_out; } -/** - * fwd_rule_add() - Add a rule to a forwarding table - * @fwd: Table to add to - * @proto: Protocol to forward - * @flags: Flags for this entry - * @addr: Our address to forward (NULL for both 0.0.0.0 and ::) - * @ifname: Only forward from this interface name, if non-empty - * @first: First port number to forward - * @last: Last port number to forward - * @to: First port of target port range to map to - */ -void fwd_rule_add(struct fwd_table *fwd, uint8_t proto, uint8_t flags, - const union inany_addr *addr, const char *ifname, - in_port_t first, in_port_t last, in_port_t to) -{ - /* Flags which can be set from the caller */ - const uint8_t allowed_flags = FWD_WEAK | FWD_SCAN; - unsigned num = (unsigned)last - first + 1; - struct fwd_rule_state *new; - unsigned i, port; - - assert(!(flags & ~allowed_flags)); - - if (fwd->count >= ARRAY_SIZE(fwd->rules)) - die("Too many port forwarding ranges"); - if ((fwd->sock_count + num) > ARRAY_SIZE(fwd->socks)) - die("Too many listening sockets"); - - /* Check for any conflicting entries */ - for (i = 0; i < fwd->count; i++) { - char newstr[INANY_ADDRSTRLEN], rulestr[INANY_ADDRSTRLEN]; - struct fwd_rule_state *rule = &fwd->rules[i]; - - if (proto != rule->rule.proto) - /* Non-conflicting protocols */ - continue; - - if (!inany_matches(addr, fwd_rule_addr(&rule->rule))) - /* Non-conflicting addresses */ - continue; - - if (last < rule->rule.first || rule->rule.last < first) - /* Port ranges don't overlap */ - continue; - - die("Forwarding configuration conflict: %s/%u-%u versus %s/%u-%u", - inany_ntop(addr, newstr, sizeof(newstr)), first, last, - inany_ntop(fwd_rule_addr(&rule->rule), - rulestr, sizeof(rulestr)), - rule->rule.first, rule->rule.last); - } - - new = &fwd->rules[fwd->count++]; - new->rule.proto = proto; - new->rule.flags = flags; - - if (addr) { - new->rule.addr = *addr; - } else { - new->rule.addr = inany_any6; - new->rule.flags |= FWD_DUAL_STACK_ANY; - } - - memset(new->rule.ifname, 0, sizeof(new->rule.ifname)); - if (ifname) { - int ret; - - ret = snprintf(new->rule.ifname, sizeof(new->rule.ifname), - "%s", ifname); - if (ret <= 0 || (size_t)ret >= sizeof(new->rule.ifname)) - die("Invalid interface name: %s", ifname); - } - - assert(first <= last); - new->rule.first = first; - new->rule.last = last; - - new->rule.to = to; - - new->socks = &fwd->socks[fwd->sock_count]; - fwd->sock_count += num; - - for (port = new->rule.first; port <= new->rule.last; port++) - new->socks[port - new->rule.first] = -1; -} - /** * fwd_rule_match() - Does a prospective flow match a given forwarding rule? * @rule: Forwarding rule diff --git a/fwd.h b/fwd.h index 00f96860..00450a4 100644 --- a/fwd.h +++ b/fwd.h @@ -15,66 +15,14 @@ #include #include "inany.h" +#include "ports.h" #include "fwd_rule.h" struct flowside; -/* Number of ports for both TCP and UDP */ -#define NUM_PORTS (1U << 16) - void fwd_probe_ephemeral(void); bool fwd_port_is_ephemeral(in_port_t port); -/** - * struct fwd_rule_state - Forwarding rule and associated state - * @rule: Rule specification - * @socks: Array of listening sockets for this entry - */ -struct fwd_rule_state { - struct fwd_rule rule; - int *socks; -}; - -#define FWD_RULE_BITS 8 -#define MAX_FWD_RULES MAX_FROM_BITS(FWD_RULE_BITS) -#define FWD_NO_HINT (-1) - -/** - * struct fwd_listen_ref - information about a single listening socket - * @port: Bound port number of the socket - * @pif: pif in which the socket is listening - * @rule: Index of forwarding rule - */ -struct fwd_listen_ref { - in_port_t port; - uint8_t pif; - unsigned rule :FWD_RULE_BITS; -}; - -/* Maximum number of listening sockets (per pif) - * - * Rationale: This lets us listen on every port for two addresses and two - * protocols (which we need for -T auto -U auto without SO_BINDTODEVICE), plus a - * comfortable number of extras. - */ -#define MAX_LISTEN_SOCKS (NUM_PORTS * 5) - -/** - * struct fwd_table - Table of forwarding rules (per initiating pif) - * @count: Number of forwarding rules - * @rules: Array of forwarding rules - * @sock_count: Number of entries used in @socks - * @socks: Listening sockets for forwarding - */ -struct fwd_table { - unsigned count; - struct fwd_rule_state rules[MAX_FWD_RULES]; - unsigned sock_count; - int socks[MAX_LISTEN_SOCKS]; -}; - -#define PORT_BITMAP_SIZE DIV_ROUND_UP(NUM_PORTS, 8) - /** * struct fwd_scan - Port scanning state for a protocol+direction * @scan4: /proc/net fd to scan for IPv4 ports when in AUTO mode diff --git a/fwd_rule.c b/fwd_rule.c index 3e39b4f..091e196 100644 --- a/fwd_rule.c +++ b/fwd_rule.c @@ -18,8 +18,9 @@ #include #include "serialise.h" - +#include "ports.h" #include "fwd_rule.h" +#include "log.h" /** * fwd_rule_addr() - Return match address for a rule @@ -74,6 +75,92 @@ const char *fwd_rule_ntop(const struct fwd_rule *rule, char *dst, size_t size) return dst; } +/** + * fwd_rule_add() - Add a rule to a forwarding table + * @fwd: Table to add to + * @proto: Protocol to forward + * @flags: Flags for this entry + * @addr: Our address to forward (NULL for both 0.0.0.0 and ::) + * @ifname: Only forward from this interface name, if non-empty + * @first: First port number to forward + * @last: Last port number to forward + * @to: First port of target port range to map to + */ +void fwd_rule_add(struct fwd_table *fwd, uint8_t proto, uint8_t flags, + const union inany_addr *addr, const char *ifname, + in_port_t first, in_port_t last, in_port_t to) +{ + /* Flags which can be set from the caller */ + const uint8_t allowed_flags = FWD_WEAK | FWD_SCAN; + unsigned num = (unsigned)last - first + 1; + struct fwd_rule_state *new; + unsigned i, port; + + assert(!(flags & ~allowed_flags)); + + if (fwd->count >= ARRAY_SIZE(fwd->rules)) + die("Too many port forwarding ranges"); + if ((fwd->sock_count + num) > ARRAY_SIZE(fwd->socks)) + die("Too many listening sockets"); + + /* Check for any conflicting entries */ + for (i = 0; i < fwd->count; i++) { + char newstr[INANY_ADDRSTRLEN], rulestr[INANY_ADDRSTRLEN]; + struct fwd_rule_state *rule = &fwd->rules[i]; + + if (proto != rule->rule.proto) + /* Non-conflicting protocols */ + continue; + + if (!inany_matches(addr, fwd_rule_addr(&rule->rule))) + /* Non-conflicting addresses */ + continue; + + if (last < rule->rule.first || rule->rule.last < first) + /* Port ranges don't overlap */ + continue; + + die("Forwarding configuration conflict: %s/%u-%u versus %s/%u-%u", + inany_ntop(addr, newstr, sizeof(newstr)), first, last, + inany_ntop(fwd_rule_addr(&rule->rule), + rulestr, sizeof(rulestr)), + rule->rule.first, rule->rule.last); + } + + new = &fwd->rules[fwd->count++]; + new->rule.proto = proto; + new->rule.flags = flags; + + if (addr) { + new->rule.addr = *addr; + } else { + new->rule.addr = inany_any6; + new->rule.flags |= FWD_DUAL_STACK_ANY; + } + + memset(new->rule.ifname, 0, sizeof(new->rule.ifname)); + if (ifname) { + int ret; + + ret = snprintf(new->rule.ifname, sizeof(new->rule.ifname), + "%s", ifname); + if (ret <= 0 || (size_t)ret >= sizeof(new->rule.ifname)) + die("Invalid interface name: %s", ifname); + } + + assert(first <= last); + new->rule.first = first; + new->rule.last = last; + + new->rule.to = to; + + new->socks = &fwd->socks[fwd->sock_count]; + fwd->sock_count += num; + + for (port = new->rule.first; port <= new->rule.last; port++) + new->socks[port - new->rule.first] = -1; +} + /** * fwd_rule_seread() - Read erialised rule from an fd * @fd: fd to serialise to diff --git a/fwd_rule.h b/fwd_rule.h index 500b955..411a396 100644 --- a/fwd_rule.h +++ b/fwd_rule.h @@ -42,7 +42,60 @@ struct fwd_rule { uint8_t flags; }; +/** + * struct fwd_rule_state - Forwarding rule and associated state + * @rule: Rule specification + * @socks: Array of listening sockets for this entry + */ +struct fwd_rule_state { + struct fwd_rule rule; + int *socks; +}; + +#define FWD_RULE_BITS 8 +#define MAX_FWD_RULES MAX_FROM_BITS(FWD_RULE_BITS) +#define FWD_NO_HINT (-1) + +/** + * struct fwd_listen_ref - information about a single listening socket + * @port: Bound port number of the socket + * @pif: pif in which the socket is listening + * @rule: Index of forwarding rule + */ +struct fwd_listen_ref { + in_port_t port; + uint8_t pif; + unsigned rule :FWD_RULE_BITS; +}; + +/* Maximum number of listening sockets (per pif) + * + * Rationale: This lets us listen on every port for two addresses and two + * protocols (which we need for -T auto -U auto without SO_BINDTODEVICE), plus a + * comfortable number of extras. + */ +#define MAX_LISTEN_SOCKS (NUM_PORTS * 5) + +/** + * struct fwd_table - Table of forwarding rules (per initiating pif) + * @count: Number of forwarding rules + * @rules: Array of forwarding rules + * @sock_count: Number of entries used in @socks + * @socks: Listening sockets for forwarding + */ +struct fwd_table { + unsigned count; + struct fwd_rule_state rules[MAX_FWD_RULES]; + unsigned sock_count; + int socks[MAX_LISTEN_SOCKS]; +}; + +#define PORT_BITMAP_SIZE DIV_ROUND_UP(NUM_PORTS, 8) + const union inany_addr *fwd_rule_addr(const struct fwd_rule *rule); +void fwd_rule_add(struct fwd_table *fwd, uint8_t proto, uint8_t flags, + const union inany_addr *addr, const char *ifname, + in_port_t first, in_port_t last, in_port_t to); #define FWD_RULE_STRLEN \ (IPPROTO_STRLEN - 1 \ diff --git a/lineread.c b/lineread.c index b9ceae1..ae495a4 100644 --- a/lineread.c +++ b/lineread.c @@ -20,7 +20,6 @@ #include #include "lineread.h" -#include "util.h" /** * lineread_init() - Prepare for line by line file reading without allocation diff --git a/log.h b/log.h index 6ceb686..e2657ef 100644 --- a/log.h +++ b/log.h @@ -7,10 +7,42 @@ #define LOG_H #include +#include #include #include #include +#ifdef PESTO +extern int verbosity; + +#define die(...) \ + do { \ + FPRINTF(stderr, __VA_ARGS__); \ + FPRINTF(stderr, "\n"); \ + exit(EXIT_FAILURE); \ + } while (0) + +#define warn(...) \ + do { \ + FPRINTF(stderr, __VA_ARGS__); \ + FPRINTF(stderr, "\n"); \ + } while (0) + +#define warn_perror(...) \ + do { \ + FPRINTF(stderr, __VA_ARGS__); \ + FPRINTF(stderr, ": %i", errno); \ + } while (0) + +#define debug(...) \ + do { \ + if (verbosity > 1) { \ + FPRINTF(stderr, __VA_ARGS__); \ + FPRINTF(stderr, "\n"); \ + } \ + } while (0) +#else + /* This would make more sense in util.h, but because we use it in die(), that * would cause awkward circular reference problems. */ @@ -65,4 +97,5 @@ void __openlog(const char *ident, int option, int facility); void logfile_init(const char *name, const char *path, size_t size); void __setlogmask(int mask); +#endif /* !PESTO */ #endif /* LOG_H */ diff --git a/pesto.c b/pesto.c index f021cdb..42fa7f2 100644 --- a/pesto.c +++ b/pesto.c @@ -34,25 +34,12 @@ #include "common.h" #include "seccomp_pesto.h" #include "serialise.h" +#include "ports.h" #include "fwd_rule.h" #include "pesto.h" +#include "log.h" -static int verbosity = 1; - -#define die(...) \ - do { \ - FPRINTF(stderr, __VA_ARGS__); \ - FPRINTF(stderr, "\n"); \ - exit(EXIT_FAILURE); \ - } while (0) - -#define debug(...) \ - do { \ - if (verbosity > 1) { \ - FPRINTF(stderr, __VA_ARGS__); \ - FPRINTF(stderr, "\n"); \ - } \ - } while (0) +int verbosity = 1; /** * xmalloc() - Allocate memory, with fatal error on failure @@ -234,7 +221,7 @@ static void show_state(const struct conf_state *state) * * Return: 0 on success, won't return on failure * - * #syscalls:pesto connect write close exit_group fstat brk + * #syscalls:pesto connect write close exit_group fstat brk getrandom * #syscalls:pesto socket s390x:socketcall i686:socketcall * #syscalls:pesto recvfrom recvmsg arm:recv ppc64le:recv * #syscalls:pesto sendto sendmsg arm:send ppc64le:send diff --git a/ports.c b/ports.c index 5480176..a7167ac 100644 --- a/ports.c +++ b/ports.c @@ -26,19 +26,93 @@ #include #include #include +#include #include #include #include #include #include +#include "lineread.h" #include "common.h" -#include "util.h" -#include "ip.h" -#include "passt.h" -#include "common.h" -#include "pesto.h" #include "ports.h" +#include "fwd_rule.h" +#include "log.h" +#include "pesto.h" + +/* Ephemeral port range: values from RFC 6335 */ +static in_port_t fwd_ephemeral_min = (1 << 15) + (1 << 14); +static in_port_t fwd_ephemeral_max = NUM_PORTS - 1; + +#define PORT_RANGE_SYSCTL "/proc/sys/net/ipv4/ip_local_port_range" + +/** fwd_probe_ephemeral() - Determine what ports this host considers ephemeral + * + * Work out what ports the host thinks are emphemeral and record it for later + * use by fwd_port_is_ephemeral(). If we're unable to probe, assume the range + * recommended by RFC 6335. + */ +void fwd_probe_ephemeral(void) +{ + char *line, *tab, *end; + struct lineread lr; + long min, max; + ssize_t len; + int fd; + + fd = open(PORT_RANGE_SYSCTL, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + warn_perror("Unable to open %s", PORT_RANGE_SYSCTL); + return; + } + + lineread_init(&lr, fd); + len = lineread_get(&lr, &line); + close(fd); + + if (len < 0) + goto parse_err; + + tab = strchr(line, '\t'); + if (!tab) + goto parse_err; + *tab = '\0'; + + errno = 0; + min = strtol(line, &end, 10); + if (*end || errno) + goto parse_err; + + errno = 0; + max = strtol(tab + 1, &end, 10); + if (*end || errno) + goto parse_err; + + if (min < 0 || min >= (long)NUM_PORTS || + max < 0 || max >= (long)NUM_PORTS) + goto parse_err; + + fwd_ephemeral_min = min; + fwd_ephemeral_max = max; + + return; + +parse_err: + warn("Unable to parse %s", PORT_RANGE_SYSCTL); +} + +/** + * fwd_port_is_ephemeral() - Is port number ephemeral? + * @port: Port number + * + * Return: true if @port is ephemeral, that is may be allocated by the kernel as + * a local port for outgoing connections or datagrams, but should not be + * used for binding services to. + */ +bool fwd_port_is_ephemeral(in_port_t port) +{ + return (port >= fwd_ephemeral_min) && (port <= fwd_ephemeral_max); +} /** * next_chunk() - Return the next piece of a string delimited by a character @@ -103,23 +177,27 @@ static int parse_port_range(const char *s, char **endptr, /** * conf_ports_range_except() - Set up forwarding for a range of ports minus a * bitmap of exclusions - * @c: Execution context - * @optname: Short option name, t, T, u, or U - * @optarg: Option argument (port specification) - * @fwd: Forwarding table to be updated - * @addr: Listening address - * @ifname: Listening interface - * @first: First port to forward - * @last: Last port to forward - * @exclude: Bitmap of ports to exclude (may be NULL) - * @to: Port to translate @first to when forwarding - * @flags: Flags for forwarding entries + * @optname: Short option name, t, T, u, or U + * @optarg: Option argument (port specification) + * @fwd: Forwarding table to be updated + * @addr: Listening address + * @ifname: Listening interface + * @first: First port to forward + * @last: Last port to forward + * @exclude: Bitmap of ports to exclude (may be NULL) + * @to: Port to translate @first to when forwarding + * @flags: Flags for forwarding entries + * @can_bindtodevice SO_BINDTODEVICE is available + * @v4_enabled IPv4 is enabled + * @v6_enabled IPv6 is enabled */ -void conf_ports_range_except(const struct ctx *c, char optname, - const char *optarg, struct fwd_table *fwd, - const union inany_addr *addr, - const char *ifname, uint16_t first, uint16_t last, - const uint8_t *exclude, uint16_t to, uint8_t flags) +void conf_ports_range_except(char optname, const char *optarg, + struct fwd_table *fwd, + const union inany_addr *addr, const char *ifname, + uint16_t first, uint16_t last, + const uint8_t *exclude, uint16_t to, uint8_t flags, + bool can_bindtodevice, + bool v4_enabled, bool v6_enabled) { unsigned delta = to - first; unsigned base, i; @@ -138,10 +216,10 @@ void conf_ports_range_except(const struct ctx *c, char optname, assert(0); if (addr) { - if (!c->ifi4 && inany_v4(addr)) { + if (!v4_enabled && inany_v4(addr)) { die("IPv4 is disabled, can't use -%c %s", optname, optarg); - } else if (!c->ifi6 && !inany_v4(addr)) { + } else if (!v6_enabled && !inany_v4(addr)) { die("IPv6 is disabled, can't use -%c %s", optname, optarg); } @@ -156,7 +234,7 @@ void conf_ports_range_except(const struct ctx *c, char optname, break; } - if ((optname == 'T' || optname == 'U') && c->no_bindtodevice) { + if ((optname == 'T' || optname == 'U') && !can_bindtodevice) { /* FIXME: Once the fwd bitmaps are removed, move this * workaround to the caller */ @@ -165,12 +243,12 @@ void conf_ports_range_except(const struct ctx *c, char optname, "SO_BINDTODEVICE unavailable, forwarding only 127.0.0.1 and ::1 for '-%c %s'", optname, optarg); - if (c->ifi4) { + if (v4_enabled) { fwd_rule_add(fwd, proto, flags, &inany_loopback4, NULL, base, i - 1, base + delta); } - if (c->ifi6) { + if (v6_enabled) { fwd_rule_add(fwd, proto, flags, &inany_loopback6, NULL, base, i - 1, base + delta); @@ -185,14 +263,17 @@ void conf_ports_range_except(const struct ctx *c, char optname, /** * conf_ports() - Parse port configuration options, initialise UDP/TCP sockets - * @c: Execution context - * @optname: Short option name, t, T, u, or U - * @optarg: Option argument (port specification) - * @fwd: Forwarding table to be updated - * @mode: Overall port forwarding mode (updated) + * @optname: Short option name, t, T, u, or U + * @optarg: Option argument (port specification) + * @fwd: Forwarding table to be updated + * @mode: Overall port forwarding mode (updated) + * @can_bindtodevice Whether SO_BINDTODEVICE is available + * @v4_enabled IPv4 is enabled + * @v6_enabled IPv6 is enabled */ -void conf_ports(const struct ctx *c, char optname, const char *optarg, - struct fwd_table *fwd, enum fwd_mode *mode) +void conf_ports(char optname, const char *optarg, struct fwd_table *fwd, + enum fwd_mode *mode, bool can_bindtodevice, + bool v4_enabled, bool v6_enabled) { union inany_addr addr_buf = inany_any6, *addr = &addr_buf; char buf[BUFSIZ], *spec, *ifname = NULL, *p; @@ -208,19 +289,11 @@ void conf_ports(const struct ctx *c, char optname, const char *optarg, return; } - if ((optname == 't' || optname == 'T') && c->no_tcp) - die("TCP port forwarding requested but TCP is disabled"); - if ((optname == 'u' || optname == 'U') && c->no_udp) - die("UDP port forwarding requested but UDP is disabled"); - if (!strcmp(optarg, "auto")) { if (*mode) goto mode_conflict; - if (c->mode != MODE_PASTA) - die("'auto' port forwarding is only allowed for pasta"); - - if ((optname == 'T' || optname == 'U') && c->no_bindtodevice) { + if ((optname == 'T' || optname == 'U') && can_bindtodevice) { warn( "'-%c auto' enabled without unprivileged SO_BINDTODEVICE", optname); warn( @@ -234,9 +307,6 @@ void conf_ports(const struct ctx *c, char optname, const char *optarg, if (*mode) goto mode_conflict; - if (c->mode == MODE_PASTA) - die("'all' port forwarding is only allowed for passt"); - *mode = FWD_MODE_ALL; /* Exclude ephemeral ports */ @@ -244,10 +314,10 @@ void conf_ports(const struct ctx *c, char optname, const char *optarg, if (fwd_port_is_ephemeral(i)) bitmap_set(exclude, i); - conf_ports_range_except(c, optname, optarg, fwd, - NULL, NULL, - 1, NUM_PORTS - 1, exclude, - 1, FWD_WEAK); + conf_ports_range_except(optname, optarg, fwd, NULL, NULL, + 1, NUM_PORTS - 1, exclude, 1, FWD_WEAK, + can_bindtodevice, + v4_enabled, v6_enabled); return; } @@ -323,7 +393,7 @@ void conf_ports(const struct ctx *c, char optname, const char *optarg, } } while ((p = next_chunk(p, ','))); - if (ifname && c->no_bindtodevice) { + if (ifname && !can_bindtodevice) { die( "Device binding for '-%c %s' unsupported (requires kernel 5.7+)", optname, optarg); @@ -338,10 +408,10 @@ void conf_ports(const struct ctx *c, char optname, const char *optarg, if (fwd_port_is_ephemeral(i)) bitmap_set(exclude, i); - conf_ports_range_except(c, optname, optarg, fwd, - addr, ifname, - 1, NUM_PORTS - 1, exclude, - 1, FWD_WEAK); + conf_ports_range_except(optname, optarg, fwd, addr, ifname, + 1, NUM_PORTS - 1, exclude, 1, FWD_WEAK, + can_bindtodevice, + v4_enabled, v6_enabled); return; } @@ -370,11 +440,11 @@ void conf_ports(const struct ctx *c, char optname, const char *optarg, if ((*p != '\0') && (*p != ',')) /* Garbage after the ranges */ goto bad; - conf_ports_range_except(c, optname, optarg, fwd, - addr, ifname, + conf_ports_range_except(optname, optarg, fwd, addr, ifname, orig_range.first, orig_range.last, - exclude, - mapped_range.first, 0); + exclude, mapped_range.first, 0, + can_bindtodevice, + v4_enabled, v6_enabled); } while ((p = next_chunk(p, ','))); return; diff --git a/ports.h b/ports.h index 3ef50e6..ffe440a 100644 --- a/ports.h +++ b/ports.h @@ -6,6 +6,13 @@ #ifndef PORTS_H #define PORTS_H +#include + +#include "inany.h" + +/* Number of ports for both TCP and UDP */ +#define NUM_PORTS (1U << 16) + /** * enum fwd_mode - Overall forwarding mode for a direction and protocol * @FWD_MODE_UNSET Initial value, not parsed/configured yet @@ -22,14 +29,20 @@ enum fwd_mode { FWD_MODE_ALL, }; -void conf_ports_range_except(const struct ctx *c, char optname, - const char *optarg, struct fwd_table *fwd, - const union inany_addr *addr, - const char *ifname, uint16_t first, uint16_t last, - const uint8_t *exclude, uint16_t to, - uint8_t flags); -void conf_ports(const struct ctx *c, char optname, const char *optarg, - struct fwd_table *fwd, enum fwd_mode *mode); +struct fwd_table; + +void fwd_probe_ephemeral(void); +bool fwd_port_is_ephemeral(in_port_t port); +void conf_ports_range_except(char optname, const char *optarg, + struct fwd_table *fwd, + const union inany_addr *addr, const char *ifname, + uint16_t first, uint16_t last, + const uint8_t *exclude, uint16_t to, uint8_t flags, + bool can_bindtodevice, + bool v4_enabled, bool v6_enabled); +void conf_ports(char optname, const char *optarg, struct fwd_table *fwd, + enum fwd_mode *mode, bool can_bindtodevice, + bool v4_enabled, bool v6_enabled); #endif /* PORTS_H */ diff --git a/util.c b/util.c index c64a1a6..ff5094e 100644 --- a/util.c +++ b/util.c @@ -367,90 +367,6 @@ long timespec_diff_ms(const struct timespec *a, const struct timespec *b) return timespec_diff_us(a, b) / 1000; } -/** - * bitmap_set() - Set single bit in bitmap - * @map: Pointer to bitmap - * @bit: Bit number to set - */ -void bitmap_set(uint8_t *map, unsigned bit) -{ - unsigned long *word = (unsigned long *)map + BITMAP_WORD(bit); - - *word |= BITMAP_BIT(bit); -} - -/** - * bitmap_clear() - Clear single bit in bitmap - * @map: Pointer to bitmap - * @bit: Bit number to clear - */ -/* cppcheck-suppress unusedFunction */ -void bitmap_clear(uint8_t *map, unsigned bit) -{ - unsigned long *word = (unsigned long *)map + BITMAP_WORD(bit); - - *word &= ~BITMAP_BIT(bit); -} - -/** - * bitmap_isset() - Check for set bit in bitmap - * @map: Pointer to bitmap - * @bit: Bit number to check - * - * Return: true if given bit is set, false if it's not - */ -bool bitmap_isset(const uint8_t *map, unsigned bit) -{ - const unsigned long *word - = (const unsigned long *)map + BITMAP_WORD(bit); - - return !!(*word & BITMAP_BIT(bit)); -} - -/** - * bitmap_or() - Logical disjunction (OR) of two bitmaps - * @dst: Pointer to result bitmap - * @size: Size of bitmaps, in bytes - * @a: First operand - * @b: Second operand - */ -/* cppcheck-suppress unusedFunction */ -void bitmap_or(uint8_t *dst, size_t size, const uint8_t *a, const uint8_t *b) -{ - unsigned long *dw = (unsigned long *)dst; - unsigned long *aw = (unsigned long *)a; - unsigned long *bw = (unsigned long *)b; - size_t i; - - for (i = 0; i < size / sizeof(long); i++, dw++, aw++, bw++) - *dw = *aw | *bw; - - for (i = size / sizeof(long) * sizeof(long); i < size; i++) - dst[i] = a[i] | b[i]; -} - -/** - * bitmap_and_not() - Logical conjunction with complement (AND NOT) of bitmap - * @dst: Pointer to result bitmap - * @size: Size of bitmaps, in bytes - * @a: First operand - * @b: Second operand - */ -void bitmap_and_not(uint8_t *dst, size_t size, - const uint8_t *a, const uint8_t *b) -{ - unsigned long *dw = (unsigned long *)dst; - unsigned long *aw = (unsigned long *)a; - unsigned long *bw = (unsigned long *)b; - size_t i; - - for (i = 0; i < size / sizeof(long); i++, dw++, aw++, bw++) - *dw = *aw & ~*bw; - - for (i = size / sizeof(long) * sizeof(long); i < size; i++) - dst[i] = a[i] & ~b[i]; -} - /** * ns_enter() - Enter configured user (unless already joined) and network ns * @c: Execution context diff --git a/util.h b/util.h index 8495ed9..8e81a80 100644 --- a/util.h +++ b/util.h @@ -28,16 +28,6 @@ #define IP_MAX_MTU USHRT_MAX #endif -#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) -#define DIV_ROUND_CLOSEST(n, d) (((n) + (d) / 2) / (d)) -#define ROUND_DOWN(x, y) ((x) & ~((y) - 1)) -#define ROUND_UP(x, y) (((x) + (y) - 1) & ~((y) - 1)) - -#define MAX_FROM_BITS(n) (((1U << (n)) - 1)) - -#define BITMAP_BIT(n) (BIT((n) % (sizeof(long) * 8))) -#define BITMAP_WORD(n) (n / (sizeof(long) * 8)) - #define SWAP(a, b) \ do { \ __typeof__(a) __x = (a); (a) = (b); (b) = __x; \ @@ -82,8 +72,6 @@ void abort_with_msg(const char *fmt, ...) #define V6 1 #define IP_VERSIONS 2 -#define ARRAY_SIZE(a) ((int)(sizeof(a) / sizeof((a)[0]))) - #define foreach(item, array) \ for ((item) = (array); (item) - (array) < ARRAY_SIZE(array); (item)++) @@ -164,12 +152,6 @@ int sock_unix(char *sock_path); void sock_probe_features(struct ctx *c); long timespec_diff_ms(const struct timespec *a, const struct timespec *b); int64_t timespec_diff_us(const struct timespec *a, const struct timespec *b); -void bitmap_set(uint8_t *map, unsigned bit); -void bitmap_clear(uint8_t *map, unsigned bit); -bool bitmap_isset(const uint8_t *map, unsigned bit); -void bitmap_or(uint8_t *dst, size_t size, const uint8_t *a, const uint8_t *b); -void bitmap_and_not(uint8_t *dst, size_t size, - const uint8_t *a, const uint8_t *b); char *line_read(char *buf, size_t len, int fd); void ns_enter(const struct ctx *c); bool ns_is_init(void); -- 2.43.0