From: Stefano Brivio <sbrivio@redhat.com>
To: passt-dev@passt.top
Cc: Laurent Vivier <lvivier@redhat.com>,
David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v5 1/6] Introduce facilities for guest migration on top of vhost-user infrastructure
Date: Wed, 5 Feb 2025 01:38:59 +0100 [thread overview]
Message-ID: <20250205003904.2797491-2-sbrivio@redhat.com> (raw)
In-Reply-To: <20250205003904.2797491-1-sbrivio@redhat.com>
Add migration facilities based on top of the current vhost-user
infrastructure, moving vu_migrate() to migrate.c.
Versioned migration stages define function pointers to be called on
source or target, or data sections that need to be transferred.
The migration header consists of a magic number and a version
identifier.
Co-authored-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
---
Makefile | 12 +--
migrate.c | 210 ++++++++++++++++++++++++++++++++++++++++++++++++++++
migrate.h | 51 +++++++++++++
passt.c | 2 +-
util.h | 26 +++++++
vu_common.c | 58 +++++----------
vu_common.h | 2 +-
7 files changed, 315 insertions(+), 46 deletions(-)
create mode 100644 migrate.c
create mode 100644 migrate.h
diff --git a/Makefile b/Makefile
index d3d4b78..be89b07 100644
--- a/Makefile
+++ b/Makefile
@@ -38,8 +38,8 @@ FLAGS += -DDUAL_STACK_SOCKETS=$(DUAL_STACK_SOCKETS)
PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c flow.c fwd.c \
icmp.c igmp.c inany.c iov.c ip.c isolation.c lineread.c log.c mld.c \
- ndp.c netlink.c packet.c passt.c pasta.c pcap.c pif.c tap.c tcp.c \
- tcp_buf.c tcp_splice.c tcp_vu.c udp.c udp_flow.c udp_vu.c util.c \
+ ndp.c netlink.c migrate.c packet.c passt.c pasta.c pcap.c pif.c tap.c \
+ tcp.c tcp_buf.c tcp_splice.c tcp_vu.c udp.c udp_flow.c udp_vu.c util.c \
vhost_user.c virtio.c vu_common.c
QRAP_SRCS = qrap.c
PASST_REPAIR_SRCS = passt-repair.c
@@ -49,10 +49,10 @@ MANPAGES = passt.1 pasta.1 qrap.1 passt-repair.1
PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h fwd.h \
flow_table.h icmp.h icmp_flow.h inany.h iov.h ip.h isolation.h \
- lineread.h log.h ndp.h netlink.h packet.h passt.h pasta.h pcap.h pif.h \
- siphash.h tap.h tcp.h tcp_buf.h tcp_conn.h tcp_internal.h tcp_splice.h \
- tcp_vu.h udp.h udp_flow.h udp_internal.h udp_vu.h util.h vhost_user.h \
- virtio.h vu_common.h
+ lineread.h log.h migrate.h ndp.h netlink.h packet.h passt.h pasta.h \
+ pcap.h pif.h siphash.h tap.h tcp.h tcp_buf.h tcp_conn.h tcp_internal.h \
+ tcp_splice.h tcp_vu.h udp.h udp_flow.h udp_internal.h udp_vu.h util.h \
+ vhost_user.h virtio.h vu_common.h
HEADERS = $(PASST_HEADERS) seccomp.h
C := \#include <sys/random.h>\nint main(){int a=getrandom(0, 0, 0);}
diff --git a/migrate.c b/migrate.c
new file mode 100644
index 0000000..a7031f9
--- /dev/null
+++ b/migrate.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/* PASST - Plug A Simple Socket Transport
+ * for qemu/UNIX domain socket mode
+ *
+ * PASTA - Pack A Subtle Tap Abstraction
+ * for network namespace/tap device mode
+ *
+ * migrate.c - Migration sections, layout, and routines
+ *
+ * Copyright (c) 2025 Red Hat GmbH
+ * Author: Stefano Brivio <sbrivio@redhat.com>
+ */
+
+#include <errno.h>
+#include <sys/uio.h>
+
+#include "util.h"
+#include "ip.h"
+#include "passt.h"
+#include "inany.h"
+#include "flow.h"
+#include "flow_table.h"
+
+#include "migrate.h"
+
+/* Current version of migration data */
+#define MIGRATE_VERSION 1
+
+/* Magic identifier for migration data */
+#define MIGRATE_MAGIC 0xB1BB1D1B0BB1D1B0
+
+/* Migration header to send from source */
+static struct migrate_header header = {
+ .magic = htonll_constant(MIGRATE_MAGIC),
+ .version = htonl_constant(MIGRATE_VERSION),
+};
+
+/**
+ * migrate_send_block() - Migration stage handler to send verbatim data
+ * @c: Execution context
+ * @stage: Migration stage
+ * @fd: Migration fd
+ *
+ * Sends the buffer in @stage->iov over the migration channel.
+ */
+__attribute__((__unused__))
+static int migrate_send_block(struct ctx *c,
+ const struct migrate_stage *stage, int fd)
+{
+ (void)c;
+
+ if (write_remainder(fd, &stage->iov, 1, 0) < 0)
+ return errno;
+
+ return 0;
+}
+
+/**
+ * migrate_recv_block() - Migration stage handler to receive verbatim data
+ * @c: Execution context
+ * @stage: Migration stage
+ * @fd: Migration fd
+ *
+ * Reads the buffer in @stage->iov from the migration channel.
+ *
+ * #syscalls:vu readv
+ */
+__attribute__((__unused__))
+static int migrate_recv_block(struct ctx *c,
+ const struct migrate_stage *stage, int fd)
+{
+ (void)c;
+
+ if (read_remainder(fd, &stage->iov, 1, 0) < 0)
+ return errno;
+
+ return 0;
+}
+
+#define DATA_STAGE(v) \
+ { \
+ .name = #v, \
+ .source = migrate_send_block, \
+ .target = migrate_recv_block, \
+ .iov = { &(v), sizeof(v) }, \
+ }
+
+/* Stages for version 1 */
+static const struct migrate_stage stages_v1[] = {
+ {
+ .name = "flow pre",
+ .target = NULL,
+ },
+ {
+ .name = "flow post",
+ .source = NULL,
+ },
+ { 0 },
+};
+
+/* Set of data versions */
+static const struct migrate_version versions[] = {
+ {
+ 1, stages_v1,
+ },
+ { 0 },
+};
+
+/**
+ * migrate_source() - Migration as source, send state to hypervisor
+ * @c: Execution context
+ * @fd: File descriptor for state transfer
+ *
+ * Return: 0 on success, positive error code on failure
+ */
+int migrate_source(struct ctx *c, int fd)
+{
+ const struct migrate_version *v = versions + ARRAY_SIZE(versions) - 1;
+ const struct migrate_stage *s;
+ int ret;
+
+ ret = write_all_buf(fd, &header, sizeof(header));
+ if (ret) {
+ err("Can't send migration header: %s, abort", strerror_(ret));
+ return ret;
+ }
+
+ for (s = v->s; *s->name; s++) {
+ if (!s->source)
+ continue;
+
+ debug("Source side migration: %s", s->name);
+
+ if ((ret = s->source(c, s, fd))) {
+ err("Source migration stage %s: %s, abort", s->name,
+ strerror_(ret));
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * migrate_target_read_header() - Read header in target
+ * @fd: Descriptor for state transfer
+ *
+ * Return: version number on success, 0 on failure with errno set
+ */
+static uint32_t migrate_target_read_header(int fd)
+{
+ struct migrate_header h;
+
+ if (read_all_buf(fd, &h, sizeof(h)))
+ return 0;
+
+ debug("Source magic: 0x%016" PRIx64 ", version: %u",
+ be64toh(h.magic), ntohl_constant(h.version));
+
+ if (ntohll_constant(h.magic) != MIGRATE_MAGIC || !ntohl(h.version)) {
+ errno = EINVAL;
+ return 0;
+ }
+
+ return ntohl(h.version);
+}
+
+/**
+ * migrate_target() - Migration as target, receive state from hypervisor
+ * @c: Execution context
+ * @fd: File descriptor for state transfer
+ *
+ * Return: 0 on success, positive error code on failure
+ */
+int migrate_target(struct ctx *c, int fd)
+{
+ const struct migrate_version *v;
+ const struct migrate_stage *s;
+ uint32_t id;
+ int ret;
+
+ id = migrate_target_read_header(fd);
+ if (!id) {
+ ret = errno;
+ err("Migration header check failed: %s, abort", strerror_(ret));
+ return ret;
+ }
+
+ for (v = versions; v->id && v->id == id; v++);
+ if (!v->id) {
+ err("Unsupported version: %u", id);
+ return -ENOTSUP;
+ }
+
+ for (s = v->s; *s->name; s++) {
+ if (!s->target)
+ continue;
+
+ debug("Target side migration: %s", s->name);
+
+ if ((ret = s->target(c, s, fd))) {
+ err("Target migration stage %s: %s, abort", s->name,
+ strerror_(ret));
+ return ret;
+ }
+ }
+
+ return 0;
+}
diff --git a/migrate.h b/migrate.h
new file mode 100644
index 0000000..3093b6e
--- /dev/null
+++ b/migrate.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright (c) 2025 Red Hat GmbH
+ * Author: Stefano Brivio <sbrivio@redhat.com>
+ */
+
+#ifndef MIGRATE_H
+#define MIGRATE_H
+
+/**
+ * struct migrate_header - Migration header from source
+ * @magic: 0xB1BB1D1B0BB1D1B0, network order
+ * @version: Highest known, target aborts if too old, network order
+ */
+struct migrate_header {
+ uint64_t magic;
+ uint32_t version;
+} __attribute__((packed));
+
+/**
+ * struct migrate_stage - Callbacks and parameters for one stage of migration
+ * @name: Stage name (for debugging)
+ * @source: Callback to implement this stage on the source
+ * @target: Callback to implement this stage on the target
+ * @iov: Optional data section to transfer
+ */
+struct migrate_stage {
+ const char *name;
+ int (*source)(struct ctx *c,
+ const struct migrate_stage *stage, int fd);
+ int (*target)(struct ctx *c,
+ const struct migrate_stage *stage, int fd);
+
+ /* FIXME: rollback callbacks? */
+
+ struct iovec iov;
+};
+
+/**
+ * struct migrate_version - Stages for a particular protocol version
+ * @id: Version number, host order
+ * @s: Ordered array of stages, NULL-terminated
+ */
+struct migrate_version {
+ uint32_t id;
+ const struct migrate_stage *s;
+};
+
+int migrate_source(struct ctx *c, int fd);
+int migrate_target(struct ctx *c, int fd);
+
+#endif /* MIGRATE_H */
diff --git a/passt.c b/passt.c
index b1c8ab6..184d4e5 100644
--- a/passt.c
+++ b/passt.c
@@ -358,7 +358,7 @@ loop:
vu_kick_cb(c.vdev, ref, &now);
break;
case EPOLL_TYPE_VHOST_MIGRATION:
- vu_migrate(c.vdev, eventmask);
+ vu_migrate(&c, eventmask);
break;
default:
/* Can't happen */
diff --git a/util.h b/util.h
index 23b165c..1aed629 100644
--- a/util.h
+++ b/util.h
@@ -122,12 +122,38 @@
(((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24))
#endif
+#ifndef __bswap_constant_32
+#define __bswap_constant_32(x) \
+ ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | \
+ (((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24))
+#endif
+
+#ifndef __bswap_constant_64
+#define __bswap_constant_64(x) \
+ ((((x) & 0xff00000000000000ULL) >> 56) | \
+ (((x) & 0x00ff000000000000ULL) >> 40) | \
+ (((x) & 0x0000ff0000000000ULL) >> 24) | \
+ (((x) & 0x000000ff00000000ULL) >> 8) | \
+ (((x) & 0x00000000ff000000ULL) << 8) | \
+ (((x) & 0x0000000000ff0000ULL) << 24) | \
+ (((x) & 0x000000000000ff00ULL) << 40) | \
+ (((x) & 0x00000000000000ffULL) << 56))
+#endif
+
#if __BYTE_ORDER == __BIG_ENDIAN
#define htons_constant(x) (x)
#define htonl_constant(x) (x)
+#define htonll_constant(x) (x)
+#define ntohs_constant(x) (x)
+#define ntohl_constant(x) (x)
+#define ntohll_constant(x) (x)
#else
#define htons_constant(x) (__bswap_constant_16(x))
#define htonl_constant(x) (__bswap_constant_32(x))
+#define htonll_constant(x) (__bswap_constant_64(x))
+#define ntohs_constant(x) (__bswap_constant_16(x))
+#define ntohl_constant(x) (__bswap_constant_32(x))
+#define ntohll_constant(x) (__bswap_constant_64(x))
#endif
/**
diff --git a/vu_common.c b/vu_common.c
index ab04d31..3d41824 100644
--- a/vu_common.c
+++ b/vu_common.c
@@ -5,6 +5,7 @@
* common_vu.c - vhost-user common UDP and TCP functions
*/
+#include <errno.h>
#include <unistd.h>
#include <sys/uio.h>
#include <sys/eventfd.h>
@@ -17,6 +18,7 @@
#include "vhost_user.h"
#include "pcap.h"
#include "vu_common.h"
+#include "migrate.h"
#define VU_MAX_TX_BUFFER_NB 2
@@ -305,48 +307,28 @@ err:
}
/**
- * vu_migrate() - Send/receive passt insternal state to/from QEMU
- * @vdev: vhost-user device
+ * vu_migrate() - Send/receive passt internal state to/from QEMU
+ * @c: Execution context
* @events: epoll events
*/
-void vu_migrate(struct vu_dev *vdev, uint32_t events)
+void vu_migrate(struct ctx *c, uint32_t events)
{
- int ret;
+ struct vu_dev *vdev = c->vdev;
+ int rc = EIO;
- /* TODO: collect/set passt internal state
- * and use vdev->device_state_fd to send/receive it
- */
debug("vu_migrate fd %d events %x", vdev->device_state_fd, events);
- if (events & EPOLLOUT) {
- debug("Saving backend state");
-
- /* send some stuff */
- ret = write(vdev->device_state_fd, "PASST", 6);
- /* value to be returned by VHOST_USER_CHECK_DEVICE_STATE */
- vdev->device_state_result = ret == -1 ? -1 : 0;
- /* Closing the file descriptor signals the end of transfer */
- epoll_del(vdev->context, vdev->device_state_fd);
- close(vdev->device_state_fd);
- vdev->device_state_fd = -1;
- } else if (events & EPOLLIN) {
- char buf[6];
-
- debug("Loading backend state");
- /* read some stuff */
- ret = read(vdev->device_state_fd, buf, sizeof(buf));
- /* value to be returned by VHOST_USER_CHECK_DEVICE_STATE */
- if (ret != sizeof(buf)) {
- vdev->device_state_result = -1;
- } else {
- ret = strncmp(buf, "PASST", sizeof(buf));
- vdev->device_state_result = ret == 0 ? 0 : -1;
- }
- } else if (events & EPOLLHUP) {
- debug("Closing migration channel");
- /* The end of file signals the end of the transfer. */
- epoll_del(vdev->context, vdev->device_state_fd);
- close(vdev->device_state_fd);
- vdev->device_state_fd = -1;
- }
+ if (events & EPOLLOUT)
+ rc = migrate_source(c, vdev->device_state_fd);
+ else if (events & EPOLLIN)
+ rc = migrate_target(c, vdev->device_state_fd);
+
+ /* EPOLLHUP without EPOLLIN/EPOLLOUT, or EPOLLERR? Migration failed */
+
+ vdev->device_state_result = rc;
+
+ epoll_ctl(c->epollfd, EPOLL_CTL_DEL, vdev->device_state_fd, NULL);
+ debug("Closing migration channel");
+ close(vdev->device_state_fd);
+ vdev->device_state_fd = -1;
}
diff --git a/vu_common.h b/vu_common.h
index d56c021..69c4006 100644
--- a/vu_common.h
+++ b/vu_common.h
@@ -57,5 +57,5 @@ void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq,
void vu_kick_cb(struct vu_dev *vdev, union epoll_ref ref,
const struct timespec *now);
int vu_send_single(const struct ctx *c, const void *buf, size_t size);
-void vu_migrate(struct vu_dev *vdev, uint32_t events);
+void vu_migrate(struct ctx *c, uint32_t events);
#endif /* VU_COMMON_H */
--
@@ -57,5 +57,5 @@ void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq,
void vu_kick_cb(struct vu_dev *vdev, union epoll_ref ref,
const struct timespec *now);
int vu_send_single(const struct ctx *c, const void *buf, size_t size);
-void vu_migrate(struct vu_dev *vdev, uint32_t events);
+void vu_migrate(struct ctx *c, uint32_t events);
#endif /* VU_COMMON_H */
--
2.43.0
next prev parent reply other threads:[~2025-02-05 0:39 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-05 0:38 [PATCH v5 0/6] Draft, incomplete series introducing state migration Stefano Brivio
2025-02-05 0:38 ` Stefano Brivio [this message]
2025-02-05 1:44 ` [PATCH v5 1/6] Introduce facilities for guest migration on top of vhost-user infrastructure David Gibson
2025-02-05 0:39 ` [PATCH v5 2/6] migrate: Make more handling common rather than vhost-user specific Stefano Brivio
2025-02-05 0:39 ` [PATCH v5 3/6] migrate: Don't handle the migration channel through epoll Stefano Brivio
2025-02-05 0:39 ` [PATCH v5 4/6] Add interfaces and configuration bits for passt-repair Stefano Brivio
2025-02-05 0:39 ` [PATCH v5 5/6] vhost_user: Make source quit after reporting migration state Stefano Brivio
2025-02-05 2:09 ` David Gibson
2025-02-05 5:47 ` Stefano Brivio
2025-02-05 8:58 ` Hanna Czenczek
2025-02-05 10:19 ` Stefano Brivio
2025-02-05 11:39 ` David Gibson
2025-02-05 0:39 ` [PATCH v5 6/6] Implement source and target sides of migration Stefano Brivio
2025-02-05 1:10 ` David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250205003904.2797491-2-sbrivio@redhat.com \
--to=sbrivio@redhat.com \
--cc=david@gibson.dropbear.id.au \
--cc=lvivier@redhat.com \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).