On Sun, Feb 09, 2025 at 11:20:02PM +0100, Stefano Brivio wrote: > In vhost-user mode, by default, create a second UNIX domain socket > accepting connections from passt-repair, with the usual listener > socket. > > When we need to set or clear TCP_REPAIR on sockets, we'll send them > via SCM_RIGHTS to passt-repair, who sets the socket option values we > ask for. > > To that end, introduce batched functions to request TCP_REPAIR > settings on sockets, so that we don't have to send a single message > for each socket, on migration. When needed, repair_flush() will > send the message and check for the reply. > > Signed-off-by: Stefano Brivio > --- > Makefile | 12 +-- > conf.c | 44 ++++++++++- > epoll_type.h | 4 + > migrate.c | 5 +- > passt.1 | 11 +++ > passt.c | 9 +++ > passt.h | 7 ++ > repair.c | 212 +++++++++++++++++++++++++++++++++++++++++++++++++++ > repair.h | 16 ++++ > tap.c | 65 +--------------- > util.c | 62 +++++++++++++++ > util.h | 1 + > 12 files changed, 375 insertions(+), 73 deletions(-) > create mode 100644 repair.c > create mode 100644 repair.h > > diff --git a/Makefile b/Makefile > index be89b07..d4e1096 100644 > --- a/Makefile > +++ b/Makefile > @@ -38,9 +38,9 @@ FLAGS += -DDUAL_STACK_SOCKETS=$(DUAL_STACK_SOCKETS) > > PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c flow.c fwd.c \ > icmp.c igmp.c inany.c iov.c ip.c isolation.c lineread.c log.c mld.c \ > - ndp.c netlink.c migrate.c packet.c passt.c pasta.c pcap.c pif.c tap.c \ > - tcp.c tcp_buf.c tcp_splice.c tcp_vu.c udp.c udp_flow.c udp_vu.c util.c \ > - vhost_user.c virtio.c vu_common.c > + ndp.c netlink.c migrate.c packet.c passt.c pasta.c pcap.c pif.c \ > + repair.c tap.c tcp.c tcp_buf.c tcp_splice.c tcp_vu.c udp.c udp_flow.c \ > + udp_vu.c util.c vhost_user.c virtio.c vu_common.c > QRAP_SRCS = qrap.c > PASST_REPAIR_SRCS = passt-repair.c > SRCS = $(PASST_SRCS) $(QRAP_SRCS) $(PASST_REPAIR_SRCS) > @@ -50,9 +50,9 @@ MANPAGES = passt.1 pasta.1 qrap.1 passt-repair.1 > PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h fwd.h \ > flow_table.h icmp.h icmp_flow.h inany.h iov.h ip.h isolation.h \ > lineread.h log.h migrate.h ndp.h netlink.h packet.h passt.h pasta.h \ > - pcap.h pif.h siphash.h tap.h tcp.h tcp_buf.h tcp_conn.h tcp_internal.h \ > - tcp_splice.h tcp_vu.h udp.h udp_flow.h udp_internal.h udp_vu.h util.h \ > - vhost_user.h virtio.h vu_common.h > + pcap.h pif.h repair.h siphash.h tap.h tcp.h tcp_buf.h tcp_conn.h \ > + tcp_internal.h tcp_splice.h tcp_vu.h udp.h udp_flow.h udp_internal.h \ > + udp_vu.h util.h vhost_user.h virtio.h vu_common.h > HEADERS = $(PASST_HEADERS) seccomp.h > > C := \#include \nint main(){int a=getrandom(0, 0, 0);} > diff --git a/conf.c b/conf.c > index 142dc94..7a5ff8b 100644 > --- a/conf.c > +++ b/conf.c > @@ -820,6 +820,9 @@ static void usage(const char *name, FILE *f, int status) > " UNIX domain socket is provided by -s option\n" > " --print-capabilities print back-end capabilities in JSON format,\n" > " only meaningful for vhost-user mode\n"); > + FPRINTF(f, > + " --repair-path PATH path for passt-repair(1)\n" > + " default: append '.repair' to UNIX domain path\n"); > } > > FPRINTF(f, > @@ -1243,8 +1246,25 @@ static void conf_nat(const char *arg, struct in_addr *addr4, > */ > static void conf_open_files(struct ctx *c) > { > - if (c->mode != MODE_PASTA && c->fd_tap == -1) > - c->fd_tap_listen = tap_sock_unix_open(c->sock_path); > + if (c->mode != MODE_PASTA && c->fd_tap == -1) { > + c->fd_tap_listen = sock_unix(c->sock_path); > + > + if (c->mode == MODE_VU && strcmp(c->repair_path, "none")) { > + if (!*c->repair_path && > + snprintf_check(c->repair_path, > + sizeof(c->repair_path), "%s.repair", > + c->sock_path)) { > + warn("passt-repair path %s not usable", > + c->repair_path); > + c->fd_repair_listen = -1; > + } else { > + c->fd_repair_listen = sock_unix(c->repair_path); > + } > + } else { > + c->fd_repair_listen = -1; > + } > + c->fd_repair = -1; > + } > > if (*c->pidfile) { > c->pidfile_fd = output_file_open(c->pidfile, O_WRONLY); > @@ -1357,9 +1377,12 @@ void conf(struct ctx *c, int argc, char **argv) > {"host-lo-to-ns-lo", no_argument, NULL, 23 }, > {"dns-host", required_argument, NULL, 24 }, > {"vhost-user", no_argument, NULL, 25 }, > + > /* vhost-user backend program convention */ > {"print-capabilities", no_argument, NULL, 26 }, > {"socket-path", required_argument, NULL, 's' }, > + > + {"repair-path", required_argument, NULL, 27 }, > { 0 }, > }; > const char *logname = (c->mode == MODE_PASTA) ? "pasta" : "passt"; > @@ -1751,6 +1774,9 @@ void conf(struct ctx *c, int argc, char **argv) > case 'D': > /* Handle these later, once addresses are configured */ > break; > + case 27: > + /* Handle this once we checked --vhost-user */ > + break; > case 'h': > usage(argv[0], stdout, EXIT_SUCCESS); > break; > @@ -1827,8 +1853,8 @@ void conf(struct ctx *c, int argc, char **argv) > if (c->ifi4 && IN4_IS_ADDR_UNSPECIFIED(&c->ip4.guest_gw)) > c->no_dhcp = 1; > > - /* Inbound port options & DNS can be parsed now (after IPv4/IPv6 > - * settings) > + /* Inbound port options, DNS, and --repair-path can be parsed now, after > + * IPv4/IPv6 settings and --vhost-user. > */ > fwd_probe_ephemeral(); > udp_portmap_clear(); > @@ -1874,6 +1900,16 @@ void conf(struct ctx *c, int argc, char **argv) > } > > die("Cannot use DNS address %s", optarg); > + } else if (name == 27) { > + if (c->mode != MODE_VU && strcmp(optarg, "none")) > + die("--repair-path is for vhost-user mode only"); > + > + if (snprintf_check(c->repair_path, > + sizeof(c->repair_path), "%s", > + optarg)) > + die("Invalid passt-repair path: %s", optarg); > + > + break; > } > } while (name != -1); > > diff --git a/epoll_type.h b/epoll_type.h > index f3ef415..7f2a121 100644 > --- a/epoll_type.h > +++ b/epoll_type.h > @@ -40,6 +40,10 @@ enum epoll_type { > EPOLL_TYPE_VHOST_CMD, > /* vhost-user kick event socket */ > EPOLL_TYPE_VHOST_KICK, > + /* TCP_REPAIR helper listening socket */ > + EPOLL_TYPE_REPAIR_LISTEN, > + /* TCP_REPAIR helper socket */ > + EPOLL_TYPE_REPAIR, > > EPOLL_NUM_TYPES, > }; > diff --git a/migrate.c b/migrate.c > index 72a6d40..1c59016 100644 > --- a/migrate.c > +++ b/migrate.c > @@ -23,6 +23,7 @@ > #include "flow_table.h" > > #include "migrate.h" > +#include "repair.h" > > /* Magic identifier for migration data */ > #define MIGRATE_MAGIC 0xB1BB1D1B0BB1D1B0 > @@ -232,7 +233,7 @@ void migrate_init(struct ctx *c) > } > > /** > - * migrate_close() - Close migration channel > + * migrate_close() - Close migration channel and connection to passt-repair > * @c: Execution context > */ > void migrate_close(struct ctx *c) > @@ -243,6 +244,8 @@ void migrate_close(struct ctx *c) > c->device_state_fd = -1; > c->device_state_result = -1; > } > + > + repair_close(c); I don't think we want this. At the moment, rollback / failed migrations aren't really handled properly anyway. But this pretty much explicitly prevents a second attempt at a failed migration. I'll send a fixup. > } > > /** > diff --git a/passt.1 b/passt.1 > index 29cc3ed..c81d539 100644 > --- a/passt.1 > +++ b/passt.1 > @@ -418,6 +418,17 @@ Enable vhost-user. The vhost-user command socket is provided by \fB--socket\fR. > .BR \-\-print-capabilities > Print back-end capabilities in JSON format, only meaningful for vhost-user mode. > > +.TP > +.BR \-\-repair-path " " \fIpath > +Path for UNIX domain socket used by the \fBpasst-repair\fR(1) helper to connect > +to \fBpasst\fR in order to set or clear the TCP_REPAIR option on sockets, during > +migration. \fB--repair-path none\fR disables this interface (if you need to > +specify a socket path called "none" you can prefix the path by \fI./\fR). > + > +Default, for \-\-vhost-user mode only, is to append \fI.repair\fR to the path > +chosen for the hypervisor UNIX domain socket. No socket is created if not in > +\-\-vhost-user mode. > + > .TP > .BR \-F ", " \-\-fd " " \fIFD > Pass a pre-opened, connected socket to \fBpasst\fR. Usually the socket is opened > diff --git a/passt.c b/passt.c > index 935a69f..6f9fb4d 100644 > --- a/passt.c > +++ b/passt.c > @@ -52,6 +52,7 @@ > #include "ndp.h" > #include "vu_common.h" > #include "migrate.h" > +#include "repair.h" > > #define EPOLL_EVENTS 8 > > @@ -76,6 +77,8 @@ char *epoll_type_str[] = { > [EPOLL_TYPE_TAP_LISTEN] = "listening qemu socket", > [EPOLL_TYPE_VHOST_CMD] = "vhost-user command socket", > [EPOLL_TYPE_VHOST_KICK] = "vhost-user kick socket", > + [EPOLL_TYPE_REPAIR_LISTEN] = "TCP_REPAIR helper listening socket", > + [EPOLL_TYPE_REPAIR] = "TCP_REPAIR helper socket", > }; > static_assert(ARRAY_SIZE(epoll_type_str) == EPOLL_NUM_TYPES, > "epoll_type_str[] doesn't match enum epoll_type"); > @@ -358,6 +361,12 @@ loop: > case EPOLL_TYPE_VHOST_KICK: > vu_kick_cb(c.vdev, ref, &now); > break; > + case EPOLL_TYPE_REPAIR_LISTEN: > + repair_listen_handler(&c, eventmask); > + break; > + case EPOLL_TYPE_REPAIR: > + repair_handler(&c, eventmask); > + break; > default: > /* Can't happen */ > ASSERT(0); > diff --git a/passt.h b/passt.h > index e73a5ac..c392be0 100644 > --- a/passt.h > +++ b/passt.h > @@ -20,6 +20,7 @@ union epoll_ref; > #include "siphash.h" > #include "ip.h" > #include "inany.h" > +#include "migrate.h" > #include "flow.h" > #include "icmp.h" > #include "fwd.h" > @@ -193,6 +194,7 @@ struct ip6_ctx { > * @foreground: Run in foreground, don't log to stderr by default > * @nofile: Maximum number of open files (ulimit -n) > * @sock_path: Path for UNIX domain socket > + * @repair_path: TCP_REPAIR helper path, can be "none", empty for default > * @pcap: Path for packet capture file > * @pidfile: Path to PID file, empty string if not configured > * @pidfile_fd: File descriptor for PID file, -1 if none > @@ -203,6 +205,8 @@ struct ip6_ctx { > * @epollfd: File descriptor for epoll instance > * @fd_tap_listen: File descriptor for listening AF_UNIX socket, if any > * @fd_tap: AF_UNIX socket, tuntap device, or pre-opened socket > + * @fd_repair_listen: File descriptor for listening TCP_REPAIR socket, if any > + * @fd_repair: Connected AF_UNIX socket for TCP_REPAIR helper > * @our_tap_mac: Pasta/passt's MAC on the tap link > * @guest_mac: MAC address of guest or namespace, seen or configured > * @hash_secret: 128-bit secret for siphash functions > @@ -247,6 +251,7 @@ struct ctx { > int foreground; > int nofile; > char sock_path[UNIX_PATH_MAX]; > + char repair_path[UNIX_PATH_MAX]; > char pcap[PATH_MAX]; > > char pidfile[PATH_MAX]; > @@ -263,6 +268,8 @@ struct ctx { > int epollfd; > int fd_tap_listen; > int fd_tap; > + int fd_repair_listen; > + int fd_repair; > unsigned char our_tap_mac[ETH_ALEN]; > unsigned char guest_mac[ETH_ALEN]; > uint64_t hash_secret[2]; > diff --git a/repair.c b/repair.c > new file mode 100644 > index 0000000..784b994 > --- /dev/null > +++ b/repair.c > @@ -0,0 +1,212 @@ > +// SPDX-License-Identifier: GPL-2.0-or-later > + > +/* PASST - Plug A Simple Socket Transport > + * for qemu/UNIX domain socket mode > + * > + * PASTA - Pack A Subtle Tap Abstraction > + * for network namespace/tap device mode > + * > + * repair.c - Interface (server) for passt-repair, set/clear TCP_REPAIR > + * > + * Copyright (c) 2025 Red Hat GmbH > + * Author: Stefano Brivio > + */ > + > +#include > +#include > + > +#include "util.h" > +#include "ip.h" > +#include "passt.h" > +#include "inany.h" > +#include "flow.h" > +#include "flow_table.h" > + > +#include "repair.h" > + > +#define SCM_MAX_FD 253 /* From Linux kernel (include/net/scm.h), not in UAPI */ > + > +/* Pending file descriptors for next repair_flush() call, or command change */ > +static int repair_fds[SCM_MAX_FD]; > + > +/* Pending command: flush pending file descriptors if it changes */ > +static int repair_cmd; This should be typed as int8_t (see below for more details). > + > +/* Number of pending file descriptors set in @repair_fds */ > +static int repair_nfds; > + > +/** > + * repair_sock_init() - Start listening for connections on helper socket > + * @c: Execution context > + */ > +void repair_sock_init(const struct ctx *c) > +{ > + union epoll_ref ref = { .type = EPOLL_TYPE_REPAIR_LISTEN }; > + struct epoll_event ev = { 0 }; > + > + if (c->fd_repair_listen == -1) > + return; > + > + if (listen(c->fd_repair_listen, 0)) { > + err_perror("listen() on repair helper socket, won't migrate"); > + return; > + } > + > + ref.fd = c->fd_repair_listen; > + ev.events = EPOLLIN | EPOLLHUP | EPOLLET; > + ev.data.u64 = ref.u64; > + if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_repair_listen, &ev)) > + err_perror("repair helper socket epoll_ctl(), won't migrate"); > +} > + > +/** > + * repair_listen_handler() - Handle events on TCP_REPAIR helper listening socket > + * @c: Execution context > + * @events: epoll events > + */ > +void repair_listen_handler(struct ctx *c, uint32_t events) > +{ > + union epoll_ref ref = { .type = EPOLL_TYPE_REPAIR }; > + struct epoll_event ev = { 0 }; > + struct ucred ucred; > + socklen_t len; > + > + if (events != EPOLLIN) { > + debug("Spurious event 0x%04x on TCP_REPAIR helper socket", > + events); > + return; > + } > + > + len = sizeof(ucred); > + > + /* Another client is already connected: accept and close right away. */ > + if (c->fd_repair != -1) { > + int discard = accept4(c->fd_repair_listen, NULL, NULL, > + SOCK_NONBLOCK); > + > + if (discard == -1) > + return; > + > + if (!getsockopt(discard, SOL_SOCKET, SO_PEERCRED, &ucred, &len)) > + info("Discarding TCP_REPAIR helper, PID %i", ucred.pid); > + > + close(discard); > + return; > + } > + > + if ((c->fd_repair = accept4(c->fd_repair_listen, NULL, NULL, 0)) < 0) { > + debug_perror("accept4() on TCP_REPAIR helper listening socket"); > + return; > + } > + > + if (!getsockopt(c->fd_repair, SOL_SOCKET, SO_PEERCRED, &ucred, &len)) > + info("Accepted TCP_REPAIR helper, PID %i", ucred.pid); > + > + ref.fd = c->fd_repair; > + ev.events = EPOLLHUP | EPOLLET; > + ev.data.u64 = ref.u64; > + if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_repair, &ev)) { > + debug_perror("epoll_ctl() on TCP_REPAIR helper socket"); > + close(c->fd_repair); > + c->fd_repair = -1; > + } > +} > + > +/** > + * repair_close() - Close connection to TCP_REPAIR helper > + * @c: Execution context > + */ > +void repair_close(struct ctx *c) > +{ > + debug("Closing TCP_REPAIR helper socket"); > + > + epoll_ctl(c->epollfd, EPOLL_CTL_DEL, c->fd_repair, NULL); > + close(c->fd_repair); > + c->fd_repair = -1; > +} > + > +/** > + * repair_handler() - Handle EPOLLHUP and EPOLLERR on TCP_REPAIR helper socket > + * @c: Execution context > + * @events: epoll events > + */ > +void repair_handler(struct ctx *c, uint32_t events) > +{ > + (void)events; > + > + repair_close(c); > +} > + > +/** > + * repair_flush() - Flush current set of sockets to helper, with current command > + * @c: Execution context > + * > + * Return: 0 on success, negative error code on failure > + */ > +int repair_flush(struct ctx *c) > +{ > + struct iovec iov = { &((int8_t){ repair_cmd }), sizeof(int8_t) }; This will only be correct for little-endian machines. Better to correctly type the repair_cmd variable. > + char buf[CMSG_SPACE(sizeof(int) * SCM_MAX_FD)] > + __attribute__ ((aligned(__alignof__(struct cmsghdr)))); > + struct cmsghdr *cmsg; > + struct msghdr msg; > + > + if (!repair_nfds) > + return 0; > + > + msg = (struct msghdr){ NULL, 0, &iov, 1, > + buf, CMSG_SPACE(sizeof(int) * repair_nfds), 0 }; > + cmsg = CMSG_FIRSTHDR(&msg); > + > + cmsg->cmsg_level = SOL_SOCKET; > + cmsg->cmsg_type = SCM_RIGHTS; > + cmsg->cmsg_len = CMSG_LEN(sizeof(int) * repair_nfds); > + memcpy(CMSG_DATA(cmsg), repair_fds, sizeof(int) * repair_nfds); > + > + repair_nfds = 0; > + > + if (sendmsg(c->fd_repair, &msg, 0) < 0) { > + int ret = -errno; > + err_perror("Failed to send sockets to TCP_REPAIR helper"); > + repair_close(c); > + return ret; > + } > + > + if (recv(c->fd_repair, &((int8_t){ 0 }), 1, 0) < 0) { I guess it works, but passing an address to an implicitly constructed variable to recv() makes me nervous. Besides we could error check a bit better here, I'll try to send another fixup. > + int ret = -errno; > + err_perror("Failed to receive reply from TCP_REPAIR helper"); > + repair_close(c); > + return ret; > + } > + > + return 0; > +} > + > +/** > + * repair_set() - Add socket to TCP_REPAIR set with given command > + * @c: Execution context > + * @s: Socket to add > + * @cmd: TCP_REPAIR_ON, TCP_REPAIR_OFF, or TCP_REPAIR_OFF_NO_WP > + * > + * Return: 0 on success, negative error code on failure > + */ > +/* cppcheck-suppress unusedFunction */ > +int repair_set(struct ctx *c, int s, int cmd) > +{ > + int rc; > + > + if (repair_nfds && repair_cmd != cmd) { > + if ((rc = repair_flush(c))) > + return rc; > + } > + > + repair_cmd = cmd; > + repair_fds[repair_nfds++] = s; > + > + if (repair_nfds >= SCM_MAX_FD) { > + if ((rc = repair_flush(c))) > + return rc; > + } > + > + return 0; > +} > diff --git a/repair.h b/repair.h > new file mode 100644 > index 0000000..de279d6 > --- /dev/null > +++ b/repair.h > @@ -0,0 +1,16 @@ > +/* SPDX-License-Identifier: GPL-2.0-or-later > + * Copyright (c) 2025 Red Hat GmbH > + * Author: Stefano Brivio > + */ > + > +#ifndef REPAIR_H > +#define REPAIR_H > + > +void repair_sock_init(const struct ctx *c); > +void repair_listen_handler(struct ctx *c, uint32_t events); > +void repair_handler(struct ctx *c, uint32_t events); > +void repair_close(struct ctx *c); > +int repair_flush(struct ctx *c); > +int repair_set(struct ctx *c, int s, int cmd); > + > +#endif /* REPAIR_H */ > diff --git a/tap.c b/tap.c > index 8c92d23..d0673e5 100644 > --- a/tap.c > +++ b/tap.c > @@ -56,6 +56,7 @@ > #include "netlink.h" > #include "pasta.h" > #include "packet.h" > +#include "repair.h" > #include "tap.h" > #include "log.h" > #include "vhost_user.h" > @@ -1151,68 +1152,6 @@ void tap_handler_pasta(struct ctx *c, uint32_t events, > tap_pasta_input(c, now); > } > > -/** > - * tap_sock_unix_open() - Create and bind AF_UNIX socket > - * @sock_path: Socket path. If empty, set on return (UNIX_SOCK_PATH as prefix) > - * > - * Return: socket descriptor on success, won't return on failure > - */ > -int tap_sock_unix_open(char *sock_path) > -{ > - int fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); > - struct sockaddr_un addr = { > - .sun_family = AF_UNIX, > - }; > - int i; > - > - if (fd < 0) > - die_perror("Failed to open UNIX domain socket"); > - > - for (i = 1; i < UNIX_SOCK_MAX; i++) { > - char *path = addr.sun_path; > - int ex, ret; > - > - if (*sock_path) > - memcpy(path, sock_path, UNIX_PATH_MAX); > - else if (snprintf_check(path, UNIX_PATH_MAX - 1, > - UNIX_SOCK_PATH, i)) > - die_perror("Can't build UNIX domain socket path"); > - > - ex = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC, > - 0); > - if (ex < 0) > - die_perror("Failed to check for UNIX domain conflicts"); > - > - ret = connect(ex, (const struct sockaddr *)&addr, sizeof(addr)); > - if (!ret || (errno != ENOENT && errno != ECONNREFUSED && > - errno != EACCES)) { > - if (*sock_path) > - die("Socket path %s already in use", path); > - > - close(ex); > - continue; > - } > - close(ex); > - > - unlink(path); > - ret = bind(fd, (const struct sockaddr *)&addr, sizeof(addr)); > - if (*sock_path && ret) > - die_perror("Failed to bind UNIX domain socket"); > - > - if (!ret) > - break; > - } > - > - if (i == UNIX_SOCK_MAX) > - die_perror("Failed to bind UNIX domain socket"); > - > - info("UNIX domain socket bound at %s", addr.sun_path); > - if (!*sock_path) > - memcpy(sock_path, addr.sun_path, UNIX_PATH_MAX); > - > - return fd; > -} > - > /** > * tap_backend_show_hints() - Give help information to start QEMU > * @c: Execution context > @@ -1423,6 +1362,8 @@ void tap_backend_init(struct ctx *c) > tap_sock_tun_init(c); > break; > case MODE_VU: > + repair_sock_init(c); > + /* fall through */ > case MODE_PASST: > tap_sock_unix_init(c); > > diff --git a/util.c b/util.c > index 4d51e04..c3c5480 100644 > --- a/util.c > +++ b/util.c > @@ -178,6 +178,68 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type, > return fd; > } > > +/** > + * sock_unix() - Create and bind AF_UNIX socket > + * @sock_path: Socket path. If empty, set on return (UNIX_SOCK_PATH as prefix) > + * > + * Return: socket descriptor on success, won't return on failure > + */ > +int sock_unix(char *sock_path) > +{ > + int fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); > + struct sockaddr_un addr = { > + .sun_family = AF_UNIX, > + }; > + int i; > + > + if (fd < 0) > + die_perror("Failed to open UNIX domain socket"); > + > + for (i = 1; i < UNIX_SOCK_MAX; i++) { > + char *path = addr.sun_path; > + int ex, ret; > + > + if (*sock_path) > + memcpy(path, sock_path, UNIX_PATH_MAX); > + else if (snprintf_check(path, UNIX_PATH_MAX - 1, > + UNIX_SOCK_PATH, i)) > + die_perror("Can't build UNIX domain socket path"); > + > + ex = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC, > + 0); > + if (ex < 0) > + die_perror("Failed to check for UNIX domain conflicts"); > + > + ret = connect(ex, (const struct sockaddr *)&addr, sizeof(addr)); > + if (!ret || (errno != ENOENT && errno != ECONNREFUSED && > + errno != EACCES)) { > + if (*sock_path) > + die("Socket path %s already in use", path); > + > + close(ex); > + continue; > + } > + close(ex); > + > + unlink(path); > + ret = bind(fd, (const struct sockaddr *)&addr, sizeof(addr)); > + if (*sock_path && ret) > + die_perror("Failed to bind UNIX domain socket"); > + > + if (!ret) > + break; > + } > + > + if (i == UNIX_SOCK_MAX) > + die_perror("Failed to bind UNIX domain socket"); > + > + info("UNIX domain socket bound at %s", addr.sun_path); > + if (!*sock_path) > + memcpy(sock_path, addr.sun_path, UNIX_PATH_MAX); > + > + return fd; > +} > + > /** > * sock_probe_mem() - Check if setting high SO_SNDBUF and SO_RCVBUF is allowed > * @c: Execution context > diff --git a/util.h b/util.h > index 255eb26..3dacb4d 100644 > --- a/util.h > +++ b/util.h > @@ -214,6 +214,7 @@ struct ctx; > int sock_l4_sa(const struct ctx *c, enum epoll_type type, > const void *sa, socklen_t sl, > const char *ifname, bool v6only, uint32_t data); > +int sock_unix(char *sock_path); > void sock_probe_mem(struct ctx *c); > long timespec_diff_ms(const struct timespec *a, const struct timespec *b); > int64_t timespec_diff_us(const struct timespec *a, const struct timespec *b); -- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson