From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>, passt-dev@passt.top
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v24 3/5] flow, migrate: Flow migration skeleton
Date: Sat, 15 Feb 2025 00:08:43 +1100 [thread overview]
Message-ID: <20250214130845.3475757-4-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20250214130845.3475757-1-david@gibson.dropbear.id.au>
Implement an outline of how to transfer the state of individual flows
during migration. We split the handling into three stages:
1) freeze
This selects which flows are eligible to be migrated, marking them with a
new flow state 'MIGRATING'. It makes any preparations for the flow to be
migrated, but writes no data to the state stream.
If migration fails, we roll back by executing step (3) on the source
instead of on the target.
2) transfer
This actually transfers data about the flows. Flows are created on the
target in MIGRATING state.
3) thaw
This "activates" the transferred flows, letting them resume normal
operation, but reads no additional data from the state stream. If this
fails for a flow, generally that flow is terminated, but we otherwise
carry on.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
flow.c | 186 +++++++++++++++++++++++++++++++++++++++++++++++++--
flow.h | 5 ++
flow_table.h | 22 ++++++
migrate.c | 39 +++++++++--
migrate.h | 2 +
5 files changed, 240 insertions(+), 14 deletions(-)
diff --git a/flow.c b/flow.c
index d9b888ce..90bff884 100644
--- a/flow.c
+++ b/flow.c
@@ -27,6 +27,7 @@ const char *flow_state_str[] = {
[FLOW_STATE_TGT] = "TGT",
[FLOW_STATE_TYPED] = "TYPED",
[FLOW_STATE_ACTIVE] = "ACTIVE",
+ [FLOW_STATE_MIGRATING] = "MIGRATING",
};
static_assert(ARRAY_SIZE(flow_state_str) == FLOW_NUM_STATES,
"flow_state_str[] doesn't match enum flow_state");
@@ -806,19 +807,18 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
continue;
}
- case FLOW_STATE_NEW:
- case FLOW_STATE_INI:
- case FLOW_STATE_TGT:
- case FLOW_STATE_TYPED:
- /* Incomplete flow at end of cycle */
- ASSERT(false);
- break;
+ case FLOW_STATE_MIGRATING:
+ /* This can occur between the SET_DEVICE_STATE_FD and
+ * CHECK_DEVICE_STATE vhost-user events, ignore.
+ */
+ continue;
case FLOW_STATE_ACTIVE:
/* Nothing to do */
break;
default:
+ /* Bad flow state at end of cycle */
ASSERT(false);
}
@@ -874,6 +874,178 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
*last_next = FLOW_MAX;
}
+/**
+ * flow_freeze() - Select and prepare flows for migration
+ * @c: Execution context
+ * @stage: Migration stage information, unused
+ * @fd: Migration file descriptor, unused
+ *
+ * MUST NOT READ OR WRITE MIGRATION STREAM
+ *
+ * Return: 0 on success, positive error code on failure
+ */
+int flow_freeze(struct ctx *c, const struct migrate_stage *stage, int fd)
+{
+ union flow *flow;
+
+ (void)stage;
+ (void)fd;
+ (void)c;
+
+ flow_foreach(flow) {
+ /* rc == 0 : not a migration candidate
+ * rc > 0 : migration candidate
+ * rc < 0 : error, fail migration
+ */
+ int rc;
+
+ switch (flow->f.type) {
+ default:
+ /* Otherwise assume it doesn't migrate */
+ rc = 0;
+ }
+
+ if (rc < 0) {
+ /* FIXME: rollback */
+ return -rc;
+ }
+ if (rc > 0)
+ flow_set_state(&flow->f, FLOW_STATE_MIGRATING);
+ }
+
+ return 0;
+}
+
+/**
+ * flow_thaw() - Resume flow operation after migration
+ * @c: Execution context
+ * @stage: Migration stage information, unused
+ * @fd: Migration file descriptor, unused
+ *
+ * MUST NOT READ OR WRITE MIGRATION STREAM
+ *
+ * Return: 0 on success, positive error code on failure
+ */
+int flow_thaw(struct ctx *c, const struct migrate_stage *stage, int fd)
+{
+ struct flow_free_cluster *free_head = NULL;
+ unsigned *last_next = &flow_first_free;
+ union flow *flow;
+
+ (void)stage;
+ (void)fd;
+ (void)c;
+
+ /* FIXME: Share logic with flow_defer_handler to rebuild free list */
+ flow_foreach_slot(flow) {
+ int rc;
+
+ if (flow->f.state == FLOW_STATE_FREE) {
+ unsigned skip = flow->free.n;
+
+ /* First entry of a free cluster must have n >= 1 */
+ ASSERT(skip);
+
+ if (free_head) {
+ /* Merge into preceding free cluster */
+ free_head->n += flow->free.n;
+ flow->free.n = flow->free.next = 0;
+ } else {
+ /* New free cluster, add to chain */
+ free_head = &flow->free;
+ *last_next = FLOW_IDX(flow);
+ last_next = &free_head->next;
+ }
+
+ /* Skip remaining empty entries */
+ flow += skip - 1;
+ continue;
+ }
+
+ if (flow->f.state == FLOW_STATE_ACTIVE) {
+ free_head = NULL;
+ continue;
+ }
+
+ ASSERT(flow->f.state == FLOW_STATE_MIGRATING);
+
+ rc = 0;
+ switch (flow->f.type) {
+ default:
+ /* Bug. We marked a flow as migrating, but we don't
+ * know how to resume it */
+ ASSERT(0);
+ }
+
+ if (rc == 0) {
+ /* Successfully resumed flow */
+ flow_set_state(&flow->f, FLOW_STATE_ACTIVE);
+ free_head = NULL;
+ continue;
+ }
+
+ flow_err(flow, "Failed to unfreeze resume flow: %s",
+ strerror_(-rc));
+
+ flow_set_state(&flow->f, FLOW_STATE_FREE);
+ memset(flow, 0, sizeof(*flow));
+
+ if (free_head) {
+ /* Add slot to current free cluster */
+ ASSERT(FLOW_IDX(flow) ==
+ FLOW_IDX(free_head) + free_head->n);
+ free_head->n++;
+ flow->free.n = flow->free.next = 0;
+ } else {
+ /* Create new free cluster */
+ free_head = &flow->free;
+ free_head->n = 1;
+ *last_next = FLOW_IDX(flow);
+ last_next = &free_head->next;
+ }
+ }
+
+ *last_next = FLOW_MAX;
+
+ return 0;
+}
+
+/**
+ * flow_migrate_source() - Transfer migrating flows to device state stream
+ * @c: Execution context
+ * @stage: Migration stage information, unused
+ * @fd: Migration file descriptor
+ *
+ * Return: 0 on success, positive error code on failure
+ */
+int flow_migrate_source(struct ctx *c, const struct migrate_stage *stage, int fd)
+{
+ (void)c;
+ (void)stage;
+ (void)fd;
+
+ /* FIXME: todo */
+ return ENOTSUP;
+}
+
+/**
+ * flow_migrate_target() - Build flows from device state stream
+ * @c: Execution context
+ * @stage: Migration stage information, unused
+ * @fd: Migration file descriptor
+ *
+ * Return: 0 on success, positive error code on failure
+ */
+int flow_migrate_target(struct ctx *c, const struct migrate_stage *stage, int fd)
+{
+ (void)c;
+ (void)stage;
+ (void)fd;
+
+ /* FIXME: todo */
+ return ENOTSUP;
+}
+
/**
* flow_init() - Initialise flow related data structures
*/
diff --git a/flow.h b/flow.h
index 24ba3ef0..deb70eb1 100644
--- a/flow.h
+++ b/flow.h
@@ -82,6 +82,10 @@
* 'true' from flow type specific deferred or timer handler
* Caveats:
* - flow_alloc_cancel() may not be called on it
+ *
+ * MIGRATING - A flow in the process of live migration
+ * Caveats:
+ * - Must only exist during live migration process
*/
enum flow_state {
FLOW_STATE_FREE,
@@ -90,6 +94,7 @@ enum flow_state {
FLOW_STATE_TGT,
FLOW_STATE_TYPED,
FLOW_STATE_ACTIVE,
+ FLOW_STATE_MIGRATING,
FLOW_NUM_STATES,
};
diff --git a/flow_table.h b/flow_table.h
index 6ff6d0f6..5ccf6644 100644
--- a/flow_table.h
+++ b/flow_table.h
@@ -89,11 +89,28 @@ static inline unsigned flow_idx(const struct flow_common *f)
flow_foreach_slot(flow_) \
if ((flow_)->f.state == FLOW_STATE_FREE) { \
(flow_) += (flow_)->free.n - 1; \
+ } else if ((flow)->f.state == FLOW_STATE_MIGRATING) { \
+ continue; \
} else if ((flow)->f.state != FLOW_STATE_ACTIVE) { \
flow_err((flow_), "BUG: Traversing non-active flow"); \
continue; \
} else
+/**
+ * flow_foreach_migrating() - step through migrating flows
+ * @flow_: Points to each active flow in order
+ */
+#define flow_foreach_migrating(flow_) \
+ flow_foreach_slot(flow_) \
+ if ((flow_)->f.state == FLOW_STATE_FREE) { \
+ (flow_) += (flow_)->free.n - 1; \
+ } else if ((flow)->f.state == FLOW_STATE_ACTIVE) { \
+ continue; \
+ } else if ((flow)->f.state != FLOW_STATE_MIGRATING) { \
+ flow_err((flow_), "BUG: Traversing non-active flow"); \
+ continue; \
+ } else
+
/**
* flow_foreach_of_type() - step through active flows of one type
* @flow_: Points to each active flow in order
@@ -209,4 +226,9 @@ void flow_activate(struct flow_common *f);
#define FLOW_ACTIVATE(flow_) \
(flow_activate(&(flow_)->f))
+int flow_freeze(struct ctx *c, const struct migrate_stage *stage, int fd);
+int flow_thaw(struct ctx *c, const struct migrate_stage *stage, int fd);
+int flow_migrate_source(struct ctx *c, const struct migrate_stage *stage, int fd);
+int flow_migrate_target(struct ctx *c, const struct migrate_stage *stage, int fd);
+
#endif /* FLOW_TABLE_H */
diff --git a/migrate.c b/migrate.c
index 1c590160..d802e2f9 100644
--- a/migrate.c
+++ b/migrate.c
@@ -98,11 +98,27 @@ static int seen_addrs_target_v1(struct ctx *c,
/* Stages for version 1 */
static const struct migrate_stage stages_v1[] = {
+ {
+ .name = "freeze flows",
+ .source = flow_freeze,
+ .rollback = flow_thaw,
+ .target = NULL,
+ },
{
.name = "observed addresses",
.source = seen_addrs_source_v1,
.target = seen_addrs_target_v1,
},
+ {
+ .name = "transfer flows",
+ .source = flow_migrate_source,
+ .target = flow_migrate_target,
+ },
+ {
+ .name = "thaw flows",
+ .source = NULL,
+ .target = flow_thaw,
+ },
{ 0 },
};
@@ -145,14 +161,23 @@ static int migrate_source(struct ctx *c, int fd)
debug("Source side migration stage: %s", s->name);
- if ((ret = s->source(c, s, fd))) {
- err("Source migration stage: %s: %s, abort", s->name,
- strerror_(ret));
- return ret;
- }
+ if ((ret = s->source(c, s, fd)))
+ goto rollback;
}
return 0;
+
+rollback:
+ err("Source migration stage: %s: %s, aborting", s->name,
+ strerror_(ret));
+
+ while (s-- > v->s) {
+ debug("Rolling back migration: %s", s->name);
+ if (s->rollback && s->rollback(c, s, fd))
+ die("Unable to roll back migration");
+ }
+
+ return ret;
}
/**
@@ -214,9 +239,9 @@ static int migrate_target(struct ctx *c, int fd)
debug("Target side migration stage: %s", s->name);
if ((ret = s->target(c, s, fd))) {
- err("Target migration stage: %s: %s, abort", s->name,
+ /* FIXME: Less brutal failure */
+ die("Target migration stage: %s: %s, abort", s->name,
strerror_(ret));
- return ret;
}
}
diff --git a/migrate.h b/migrate.h
index 2c51cd97..eca65226 100644
--- a/migrate.h
+++ b/migrate.h
@@ -23,11 +23,13 @@ struct migrate_header {
* struct migrate_stage - Callbacks and parameters for one stage of migration
* @name: Stage name (for debugging)
* @source: Callback to implement this stage on the source
+ * @rollback: Callback to roll back this stage on the source
* @target: Callback to implement this stage on the target
*/
struct migrate_stage {
const char *name;
int (*source)(struct ctx *c, const struct migrate_stage *stage, int fd);
+ int (*rollback)(struct ctx *c, const struct migrate_stage *stage, int fd);
int (*target)(struct ctx *c, const struct migrate_stage *stage, int fd);
/* Add here separate rollback callbacks if needed */
--
@@ -23,11 +23,13 @@ struct migrate_header {
* struct migrate_stage - Callbacks and parameters for one stage of migration
* @name: Stage name (for debugging)
* @source: Callback to implement this stage on the source
+ * @rollback: Callback to roll back this stage on the source
* @target: Callback to implement this stage on the target
*/
struct migrate_stage {
const char *name;
int (*source)(struct ctx *c, const struct migrate_stage *stage, int fd);
+ int (*rollback)(struct ctx *c, const struct migrate_stage *stage, int fd);
int (*target)(struct ctx *c, const struct migrate_stage *stage, int fd);
/* Add here separate rollback callbacks if needed */
--
2.48.1
next prev parent reply other threads:[~2025-02-14 13:08 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-14 13:08 [PATCH v24 0/5] State migration David Gibson
2025-02-14 13:08 ` [PATCH v24 1/5] vhost_user: Clear ring address on GET_VRING_BASE David Gibson
2025-02-15 5:15 ` Stefano Brivio
2025-02-14 13:08 ` [PATCH v24 2/5] flow: Flow table traversing macros David Gibson
2025-02-14 13:08 ` David Gibson [this message]
2025-02-14 13:08 ` [PATCH v24 4/5] migrate: Migrate TCP flows David Gibson
2025-02-14 13:08 ` [PATCH v24 5/5] test: Add migration tests David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250214130845.3475757-4-david@gibson.dropbear.id.au \
--to=david@gibson.dropbear.id.au \
--cc=passt-dev@passt.top \
--cc=sbrivio@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).