* [PATCH] flow, repair: Proper error handling for missing passt-repair helper on target
@ 2025-06-04 16:37 Stefano Brivio
0 siblings, 0 replies; only message in thread
From: Stefano Brivio @ 2025-06-04 16:37 UTC (permalink / raw)
To: passt-dev
If the target doesn't have a connected passt-repair helper,
repair_wait() sets a timeout and forcibly calls accept4(), but if the
timeout expires, we don't report any error and proceed as if we had a
connected helper.
Later, in repair_flush(), sendmsg() will fail for each flow we try to
migrate with EBADF, and we'll handle that gracefully, but it makes no
sense to even try, and it also leads to noise in the logs.
Similarly to how we handle a missing repair helper in
flow_migrate_repair_all(), propagate timeout and other errors through
repair_wait(), and don't attempt to migrate flows if repair_wait()
failed.
Fixes: c8b520c0625b ("flow, repair: Wait for a short while for passt-repair to connect")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
---
flow.c | 3 ++-
repair.c | 38 ++++++++++++++++++++++++++++----------
repair.h | 4 ++--
3 files changed, 32 insertions(+), 13 deletions(-)
diff --git a/flow.c b/flow.c
index cc05033..da5c813 100644
--- a/flow.c
+++ b/flow.c
@@ -1130,7 +1130,8 @@ int flow_migrate_target(struct ctx *c, const struct migrate_stage *stage,
if (!count)
return 0;
- repair_wait(c);
+ if ((rc = repair_wait(c)))
+ return -rc;
if ((rc = flow_migrate_repair_all(c, true)))
return -rc;
diff --git a/repair.c b/repair.c
index 149fe51..f6b1bf3 100644
--- a/repair.c
+++ b/repair.c
@@ -68,18 +68,21 @@ void repair_sock_init(const struct ctx *c)
* repair_listen_handler() - Handle events on TCP_REPAIR helper listening socket
* @c: Execution context
* @events: epoll events
+ *
+ * Return: 0 on valid event with new connected socket, error code on failure
*/
-void repair_listen_handler(struct ctx *c, uint32_t events)
+int repair_listen_handler(struct ctx *c, uint32_t events)
{
union epoll_ref ref = { .type = EPOLL_TYPE_REPAIR };
struct epoll_event ev = { 0 };
struct ucred ucred;
socklen_t len;
+ int rc;
if (events != EPOLLIN) {
debug("Spurious event 0x%04x on TCP_REPAIR helper socket",
events);
- return;
+ return EINVAL;
}
len = sizeof(ucred);
@@ -90,18 +93,19 @@ void repair_listen_handler(struct ctx *c, uint32_t events)
SOCK_NONBLOCK);
if (discard == -1)
- return;
+ return errno;
if (!getsockopt(discard, SOL_SOCKET, SO_PEERCRED, &ucred, &len))
info("Discarding TCP_REPAIR helper, PID %i", ucred.pid);
close(discard);
- return;
+ return EEXIST;
}
if ((c->fd_repair = accept4(c->fd_repair_listen, NULL, NULL, 0)) < 0) {
+ rc = errno;
debug_perror("accept4() on TCP_REPAIR helper listening socket");
- return;
+ return rc;
}
if (!getsockopt(c->fd_repair, SOL_SOCKET, SO_PEERCRED, &ucred, &len))
@@ -111,10 +115,14 @@ void repair_listen_handler(struct ctx *c, uint32_t events)
ev.events = EPOLLHUP | EPOLLET;
ev.data.u64 = ref.u64;
if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_repair, &ev)) {
+ rc = errno;
debug_perror("epoll_ctl() on TCP_REPAIR helper socket");
close(c->fd_repair);
c->fd_repair = -1;
+ return rc;
}
+
+ return 0;
}
/**
@@ -145,29 +153,39 @@ void repair_handler(struct ctx *c, uint32_t events)
/**
* repair_wait() - Wait (with timeout) for TCP_REPAIR helper to connect
* @c: Execution context
+ *
+ * Return: 0 on success or if already connected, error code on failure
*/
-void repair_wait(struct ctx *c)
+int repair_wait(struct ctx *c)
{
struct timeval tv = { .tv_sec = 0,
.tv_usec = (long)(REPAIR_ACCEPT_TIMEOUT_US) };
+ int rc;
+
static_assert(REPAIR_ACCEPT_TIMEOUT_US < 1000 * 1000,
".tv_usec is greater than 1000 * 1000");
- if (c->fd_repair >= 0 || c->fd_repair_listen == -1)
- return;
+ if (c->fd_repair >= 0)
+ return 0;
+
+ if (c->fd_repair_listen == -1)
+ return ENOENT;
if (setsockopt(c->fd_repair_listen, SOL_SOCKET, SO_RCVTIMEO,
&tv, sizeof(tv))) {
+ rc = errno;
err_perror("Set timeout on TCP_REPAIR listening socket");
- return;
+ return rc;
}
- repair_listen_handler(c, EPOLLIN);
+ rc = repair_listen_handler(c, EPOLLIN);
tv.tv_usec = 0;
if (setsockopt(c->fd_repair_listen, SOL_SOCKET, SO_RCVTIMEO,
&tv, sizeof(tv)))
err_perror("Clear timeout on TCP_REPAIR listening socket");
+
+ return rc;
}
/**
diff --git a/repair.h b/repair.h
index 1d37922..ab27e67 100644
--- a/repair.h
+++ b/repair.h
@@ -7,10 +7,10 @@
#define REPAIR_H
void repair_sock_init(const struct ctx *c);
-void repair_listen_handler(struct ctx *c, uint32_t events);
+int repair_listen_handler(struct ctx *c, uint32_t events);
void repair_handler(struct ctx *c, uint32_t events);
void repair_close(struct ctx *c);
-void repair_wait(struct ctx *c);
+int repair_wait(struct ctx *c);
int repair_flush(struct ctx *c);
int repair_set(struct ctx *c, int s, int cmd);
--
@@ -7,10 +7,10 @@
#define REPAIR_H
void repair_sock_init(const struct ctx *c);
-void repair_listen_handler(struct ctx *c, uint32_t events);
+int repair_listen_handler(struct ctx *c, uint32_t events);
void repair_handler(struct ctx *c, uint32_t events);
void repair_close(struct ctx *c);
-void repair_wait(struct ctx *c);
+int repair_wait(struct ctx *c);
int repair_flush(struct ctx *c);
int repair_set(struct ctx *c, int s, int cmd);
--
2.43.0
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2025-06-04 16:37 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-06-04 16:37 [PATCH] flow, repair: Proper error handling for missing passt-repair helper on target Stefano Brivio
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).