public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: Stefano Brivio <sbrivio@redhat.com>
To: passt-dev@passt.top
Cc: Laurent Vivier <lvivier@redhat.com>,
	David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v5 1/6] Introduce facilities for guest migration on top of vhost-user infrastructure
Date: Wed,  5 Feb 2025 01:38:59 +0100	[thread overview]
Message-ID: <20250205003904.2797491-2-sbrivio@redhat.com> (raw)
In-Reply-To: <20250205003904.2797491-1-sbrivio@redhat.com>

Add migration facilities based on top of the current vhost-user
infrastructure, moving vu_migrate() to migrate.c.

Versioned migration stages define function pointers to be called on
source or target, or data sections that need to be transferred.

The migration header consists of a magic number and a version
identifier.

Co-authored-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
---
 Makefile    |  12 +--
 migrate.c   | 210 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 migrate.h   |  51 +++++++++++++
 passt.c     |   2 +-
 util.h      |  26 +++++++
 vu_common.c |  58 +++++----------
 vu_common.h |   2 +-
 7 files changed, 315 insertions(+), 46 deletions(-)
 create mode 100644 migrate.c
 create mode 100644 migrate.h

diff --git a/Makefile b/Makefile
index d3d4b78..be89b07 100644
--- a/Makefile
+++ b/Makefile
@@ -38,8 +38,8 @@ FLAGS += -DDUAL_STACK_SOCKETS=$(DUAL_STACK_SOCKETS)
 
 PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c flow.c fwd.c \
 	icmp.c igmp.c inany.c iov.c ip.c isolation.c lineread.c log.c mld.c \
-	ndp.c netlink.c packet.c passt.c pasta.c pcap.c pif.c tap.c tcp.c \
-	tcp_buf.c tcp_splice.c tcp_vu.c udp.c udp_flow.c udp_vu.c util.c \
+	ndp.c netlink.c migrate.c packet.c passt.c pasta.c pcap.c pif.c tap.c \
+	tcp.c tcp_buf.c tcp_splice.c tcp_vu.c udp.c udp_flow.c udp_vu.c util.c \
 	vhost_user.c virtio.c vu_common.c
 QRAP_SRCS = qrap.c
 PASST_REPAIR_SRCS = passt-repair.c
@@ -49,10 +49,10 @@ MANPAGES = passt.1 pasta.1 qrap.1 passt-repair.1
 
 PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h fwd.h \
 	flow_table.h icmp.h icmp_flow.h inany.h iov.h ip.h isolation.h \
-	lineread.h log.h ndp.h netlink.h packet.h passt.h pasta.h pcap.h pif.h \
-	siphash.h tap.h tcp.h tcp_buf.h tcp_conn.h tcp_internal.h tcp_splice.h \
-	tcp_vu.h udp.h udp_flow.h udp_internal.h udp_vu.h util.h vhost_user.h \
-	virtio.h vu_common.h
+	lineread.h log.h migrate.h ndp.h netlink.h packet.h passt.h pasta.h \
+	pcap.h pif.h siphash.h tap.h tcp.h tcp_buf.h tcp_conn.h tcp_internal.h \
+	tcp_splice.h tcp_vu.h udp.h udp_flow.h udp_internal.h udp_vu.h util.h \
+	vhost_user.h virtio.h vu_common.h
 HEADERS = $(PASST_HEADERS) seccomp.h
 
 C := \#include <sys/random.h>\nint main(){int a=getrandom(0, 0, 0);}
diff --git a/migrate.c b/migrate.c
new file mode 100644
index 0000000..a7031f9
--- /dev/null
+++ b/migrate.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/* PASST - Plug A Simple Socket Transport
+ *  for qemu/UNIX domain socket mode
+ *
+ * PASTA - Pack A Subtle Tap Abstraction
+ *  for network namespace/tap device mode
+ *
+ * migrate.c - Migration sections, layout, and routines
+ *
+ * Copyright (c) 2025 Red Hat GmbH
+ * Author: Stefano Brivio <sbrivio@redhat.com>
+ */
+
+#include <errno.h>
+#include <sys/uio.h>
+
+#include "util.h"
+#include "ip.h"
+#include "passt.h"
+#include "inany.h"
+#include "flow.h"
+#include "flow_table.h"
+
+#include "migrate.h"
+
+/* Current version of migration data */
+#define MIGRATE_VERSION		1
+
+/* Magic identifier for migration data */
+#define MIGRATE_MAGIC		0xB1BB1D1B0BB1D1B0
+
+/* Migration header to send from source */
+static struct migrate_header header = {
+	.magic		= htonll_constant(MIGRATE_MAGIC),
+	.version	= htonl_constant(MIGRATE_VERSION),
+};
+
+/**
+ * migrate_send_block() - Migration stage handler to send verbatim data
+ * @c:		Execution context
+ * @stage:	Migration stage
+ * @fd:		Migration fd
+ *
+ * Sends the buffer in @stage->iov over the migration channel.
+ */
+__attribute__((__unused__))
+static int migrate_send_block(struct ctx *c,
+			      const struct migrate_stage *stage, int fd)
+{
+	(void)c;
+
+	if (write_remainder(fd, &stage->iov, 1, 0) < 0)
+		return errno;
+
+	return 0;
+}
+
+/**
+ * migrate_recv_block() - Migration stage handler to receive verbatim data
+ * @c:		Execution context
+ * @stage:	Migration stage
+ * @fd:		Migration fd
+ *
+ * Reads the buffer in @stage->iov from the migration channel.
+ *
+ * #syscalls:vu readv
+ */
+__attribute__((__unused__))
+static int migrate_recv_block(struct ctx *c,
+			      const struct migrate_stage *stage, int fd)
+{
+	(void)c;
+
+	if (read_remainder(fd, &stage->iov, 1, 0) < 0)
+		return errno;
+
+	return 0;
+}
+
+#define DATA_STAGE(v) \
+	{					\
+		.name = #v,			\
+		.source = migrate_send_block,	\
+		.target = migrate_recv_block,	\
+		.iov = { &(v), sizeof(v) },	\
+	}
+
+/* Stages for version 1 */
+static const struct migrate_stage stages_v1[] = {
+	{
+		.name = "flow pre",
+		.target = NULL,
+	},
+	{
+		.name = "flow post",
+		.source = NULL,
+	},
+	{ 0 },
+};
+
+/* Set of data versions */
+static const struct migrate_version versions[] = {
+	{
+		1,	stages_v1,
+	},
+	{ 0 },
+};
+
+/**
+ * migrate_source() - Migration as source, send state to hypervisor
+ * @c:		Execution context
+ * @fd:		File descriptor for state transfer
+ *
+ * Return: 0 on success, positive error code on failure
+ */
+int migrate_source(struct ctx *c, int fd)
+{
+	const struct migrate_version *v = versions + ARRAY_SIZE(versions) - 1;
+	const struct migrate_stage *s;
+	int ret;
+
+	ret = write_all_buf(fd, &header, sizeof(header));
+	if (ret) {
+		err("Can't send migration header: %s, abort", strerror_(ret));
+		return ret;
+	}
+
+	for (s = v->s; *s->name; s++) {
+		if (!s->source)
+			continue;
+
+		debug("Source side migration: %s", s->name);
+
+		if ((ret = s->source(c, s, fd))) {
+			err("Source migration stage %s: %s, abort", s->name,
+			    strerror_(ret));
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * migrate_target_read_header() - Read header in target
+ * @fd:		Descriptor for state transfer
+ *
+ * Return: version number on success, 0 on failure with errno set
+ */
+static uint32_t migrate_target_read_header(int fd)
+{
+	struct migrate_header h;
+
+	if (read_all_buf(fd, &h, sizeof(h)))
+		return 0;
+
+	debug("Source magic: 0x%016" PRIx64 ", version: %u",
+	      be64toh(h.magic), ntohl_constant(h.version));
+
+	if (ntohll_constant(h.magic) != MIGRATE_MAGIC || !ntohl(h.version)) {
+		errno = EINVAL;
+		return 0;
+	}
+
+	return ntohl(h.version);
+}
+
+/**
+ * migrate_target() - Migration as target, receive state from hypervisor
+ * @c:		Execution context
+ * @fd:		File descriptor for state transfer
+ *
+ * Return: 0 on success, positive error code on failure
+ */
+int migrate_target(struct ctx *c, int fd)
+{
+	const struct migrate_version *v;
+	const struct migrate_stage *s;
+	uint32_t id;
+	int ret;
+
+	id = migrate_target_read_header(fd);
+	if (!id) {
+		ret = errno;
+		err("Migration header check failed: %s, abort", strerror_(ret));
+		return ret;
+	}
+
+	for (v = versions; v->id && v->id == id; v++);
+	if (!v->id) {
+		err("Unsupported version: %u", id);
+		return -ENOTSUP;
+	}
+
+	for (s = v->s; *s->name; s++) {
+		if (!s->target)
+			continue;
+
+		debug("Target side migration: %s", s->name);
+
+		if ((ret = s->target(c, s, fd))) {
+			err("Target migration stage %s: %s, abort", s->name,
+			    strerror_(ret));
+			return ret;
+		}
+	}
+
+	return 0;
+}
diff --git a/migrate.h b/migrate.h
new file mode 100644
index 0000000..3093b6e
--- /dev/null
+++ b/migrate.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright (c) 2025 Red Hat GmbH
+ * Author: Stefano Brivio <sbrivio@redhat.com>
+ */
+
+#ifndef MIGRATE_H
+#define MIGRATE_H
+
+/**
+ * struct migrate_header - Migration header from source
+ * @magic:		0xB1BB1D1B0BB1D1B0, network order
+ * @version:		Highest known, target aborts if too old, network order
+ */
+struct migrate_header {
+	uint64_t magic;
+	uint32_t version;
+} __attribute__((packed));
+
+/**
+ * struct migrate_stage - Callbacks and parameters for one stage of migration
+ * @name:	Stage name (for debugging)
+ * @source:	Callback to implement this stage on the source
+ * @target:	Callback to implement this stage on the target
+ * @iov:	Optional data section to transfer
+ */
+struct migrate_stage {
+	const char *name;
+	int (*source)(struct ctx *c,
+		      const struct migrate_stage *stage, int fd);
+	int (*target)(struct ctx *c,
+		      const struct migrate_stage *stage, int fd);
+
+	/* FIXME: rollback callbacks? */
+
+	struct iovec iov;
+};
+
+/**
+ * struct migrate_version - Stages for a particular protocol version
+ * @id:		Version number, host order
+ * @s:		Ordered array of stages, NULL-terminated
+ */
+struct migrate_version {
+	uint32_t id;
+	const struct migrate_stage *s;
+};
+
+int migrate_source(struct ctx *c, int fd);
+int migrate_target(struct ctx *c, int fd);
+
+#endif /* MIGRATE_H */
diff --git a/passt.c b/passt.c
index b1c8ab6..184d4e5 100644
--- a/passt.c
+++ b/passt.c
@@ -358,7 +358,7 @@ loop:
 			vu_kick_cb(c.vdev, ref, &now);
 			break;
 		case EPOLL_TYPE_VHOST_MIGRATION:
-			vu_migrate(c.vdev, eventmask);
+			vu_migrate(&c, eventmask);
 			break;
 		default:
 			/* Can't happen */
diff --git a/util.h b/util.h
index 23b165c..1aed629 100644
--- a/util.h
+++ b/util.h
@@ -122,12 +122,38 @@
 	 (((x) & 0x0000ff00) <<  8) | (((x) & 0x000000ff) << 24))
 #endif
 
+#ifndef __bswap_constant_32
+#define __bswap_constant_32(x)						\
+	((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >>  8) |	\
+	 (((x) & 0x0000ff00) <<  8) | (((x) & 0x000000ff) << 24))
+#endif
+
+#ifndef __bswap_constant_64
+#define __bswap_constant_64(x) \
+	((((x) & 0xff00000000000000ULL) >> 56) |			\
+	 (((x) & 0x00ff000000000000ULL) >> 40) |			\
+	 (((x) & 0x0000ff0000000000ULL) >> 24) |			\
+	 (((x) & 0x000000ff00000000ULL) >> 8)  |			\
+	 (((x) & 0x00000000ff000000ULL) << 8)  |			\
+	 (((x) & 0x0000000000ff0000ULL) << 24) |			\
+	 (((x) & 0x000000000000ff00ULL) << 40) |			\
+	 (((x) & 0x00000000000000ffULL) << 56))
+#endif
+
 #if __BYTE_ORDER == __BIG_ENDIAN
 #define	htons_constant(x)	(x)
 #define	htonl_constant(x)	(x)
+#define htonll_constant(x)	(x)
+#define	ntohs_constant(x)	(x)
+#define	ntohl_constant(x)	(x)
+#define ntohll_constant(x)	(x)
 #else
 #define	htons_constant(x)	(__bswap_constant_16(x))
 #define	htonl_constant(x)	(__bswap_constant_32(x))
+#define	htonll_constant(x)	(__bswap_constant_64(x))
+#define	ntohs_constant(x)	(__bswap_constant_16(x))
+#define	ntohl_constant(x)	(__bswap_constant_32(x))
+#define	ntohll_constant(x)	(__bswap_constant_64(x))
 #endif
 
 /**
diff --git a/vu_common.c b/vu_common.c
index ab04d31..3d41824 100644
--- a/vu_common.c
+++ b/vu_common.c
@@ -5,6 +5,7 @@
  * common_vu.c - vhost-user common UDP and TCP functions
  */
 
+#include <errno.h>
 #include <unistd.h>
 #include <sys/uio.h>
 #include <sys/eventfd.h>
@@ -17,6 +18,7 @@
 #include "vhost_user.h"
 #include "pcap.h"
 #include "vu_common.h"
+#include "migrate.h"
 
 #define VU_MAX_TX_BUFFER_NB	2
 
@@ -305,48 +307,28 @@ err:
 }
 
 /**
- * vu_migrate() - Send/receive passt insternal state to/from QEMU
- * @vdev:	vhost-user device
+ * vu_migrate() - Send/receive passt internal state to/from QEMU
+ * @c:		Execution context
  * @events:	epoll events
  */
-void vu_migrate(struct vu_dev *vdev, uint32_t events)
+void vu_migrate(struct ctx *c, uint32_t events)
 {
-	int ret;
+	struct vu_dev *vdev = c->vdev;
+	int rc = EIO;
 
-	/* TODO: collect/set passt internal state
-	 * and use vdev->device_state_fd to send/receive it
-	 */
 	debug("vu_migrate fd %d events %x", vdev->device_state_fd, events);
-	if (events & EPOLLOUT) {
-		debug("Saving backend state");
-
-		/* send some stuff */
-		ret = write(vdev->device_state_fd, "PASST", 6);
-		/* value to be returned by VHOST_USER_CHECK_DEVICE_STATE */
-		vdev->device_state_result = ret == -1 ? -1 : 0;
-		/* Closing the file descriptor signals the end of transfer */
-		epoll_del(vdev->context, vdev->device_state_fd);
-		close(vdev->device_state_fd);
-		vdev->device_state_fd = -1;
-	} else if (events & EPOLLIN) {
-		char buf[6];
-
-		debug("Loading backend state");
-		/* read some stuff */
-		ret = read(vdev->device_state_fd, buf, sizeof(buf));
-		/* value to be returned by VHOST_USER_CHECK_DEVICE_STATE */
-		if (ret != sizeof(buf)) {
-			vdev->device_state_result = -1;
-		} else {
-			ret = strncmp(buf, "PASST", sizeof(buf));
-			vdev->device_state_result = ret == 0 ? 0 : -1;
-		}
-	} else if (events & EPOLLHUP) {
-		debug("Closing migration channel");
 
-		/* The end of file signals the end of the transfer. */
-		epoll_del(vdev->context, vdev->device_state_fd);
-		close(vdev->device_state_fd);
-		vdev->device_state_fd = -1;
-	}
+	if (events & EPOLLOUT)
+		rc = migrate_source(c, vdev->device_state_fd);
+	else if (events & EPOLLIN)
+		rc = migrate_target(c, vdev->device_state_fd);
+
+	/* EPOLLHUP without EPOLLIN/EPOLLOUT, or EPOLLERR? Migration failed */
+
+	vdev->device_state_result = rc;
+
+	epoll_ctl(c->epollfd, EPOLL_CTL_DEL, vdev->device_state_fd, NULL);
+	debug("Closing migration channel");
+	close(vdev->device_state_fd);
+	vdev->device_state_fd = -1;
 }
diff --git a/vu_common.h b/vu_common.h
index d56c021..69c4006 100644
--- a/vu_common.h
+++ b/vu_common.h
@@ -57,5 +57,5 @@ void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq,
 void vu_kick_cb(struct vu_dev *vdev, union epoll_ref ref,
 		const struct timespec *now);
 int vu_send_single(const struct ctx *c, const void *buf, size_t size);
-void vu_migrate(struct vu_dev *vdev, uint32_t events);
+void vu_migrate(struct ctx *c, uint32_t events);
 #endif /* VU_COMMON_H */
-- 
@@ -57,5 +57,5 @@ void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq,
 void vu_kick_cb(struct vu_dev *vdev, union epoll_ref ref,
 		const struct timespec *now);
 int vu_send_single(const struct ctx *c, const void *buf, size_t size);
-void vu_migrate(struct vu_dev *vdev, uint32_t events);
+void vu_migrate(struct ctx *c, uint32_t events);
 #endif /* VU_COMMON_H */
-- 
2.43.0


  reply	other threads:[~2025-02-05  0:39 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-05  0:38 [PATCH v5 0/6] Draft, incomplete series introducing state migration Stefano Brivio
2025-02-05  0:38 ` Stefano Brivio [this message]
2025-02-05  1:44   ` [PATCH v5 1/6] Introduce facilities for guest migration on top of vhost-user infrastructure David Gibson
2025-02-05  0:39 ` [PATCH v5 2/6] migrate: Make more handling common rather than vhost-user specific Stefano Brivio
2025-02-05  0:39 ` [PATCH v5 3/6] migrate: Don't handle the migration channel through epoll Stefano Brivio
2025-02-05  0:39 ` [PATCH v5 4/6] Add interfaces and configuration bits for passt-repair Stefano Brivio
2025-02-05  0:39 ` [PATCH v5 5/6] vhost_user: Make source quit after reporting migration state Stefano Brivio
2025-02-05  2:09   ` David Gibson
2025-02-05  5:47     ` Stefano Brivio
2025-02-05  8:58       ` Hanna Czenczek
2025-02-05 10:19         ` Stefano Brivio
2025-02-05 11:39         ` David Gibson
2025-02-05  0:39 ` [PATCH v5 6/6] Implement source and target sides of migration Stefano Brivio
2025-02-05  1:10   ` David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250205003904.2797491-2-sbrivio@redhat.com \
    --to=sbrivio@redhat.com \
    --cc=david@gibson.dropbear.id.au \
    --cc=lvivier@redhat.com \
    --cc=passt-dev@passt.top \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).