// SPDX-License-Identifier: GPL-2.0-or-later /* PASST - Plug A Simple Socket Transport * for qemu/UNIX domain socket mode * * PASTA - Pack A Subtle Tap Abstraction * for network namespace/tap device mode * * migrate.c - Migration sections, layout, and routines * * Copyright (c) 2025 Red Hat GmbH * Author: Stefano Brivio */ #include #include #include "util.h" #include "ip.h" #include "passt.h" #include "conf.h" #include "fwd.h" #include "inany.h" #include "flow.h" #include "flow_table.h" #include "migrate.h" #include "repair.h" /* Magic identifier for migration data */ #define MIGRATE_MAGIC 0xB1BB1D1B0BB1D1B0 /** * struct migrate_seen_addrs_v2 - Migratable guest addresses for v2 state stream * @addr6: Observed guest IPv6 address * @addr6_ll: Observed guest IPv6 link-local address * @addr4: Observed guest IPv4 address * @mac: Observed guest MAC address */ struct migrate_seen_addrs_v2 { struct in6_addr addr6; struct in6_addr addr6_ll; struct in_addr addr4; unsigned char mac[ETH_ALEN]; } __attribute__((packed)); /** * Wire format flags for address migration (v3) * These are stable values - do not change existing assignments */ #define MIGRATE_ADDR_USER BIT(0) #define MIGRATE_ADDR_HOST BIT(1) #define MIGRATE_ADDR_LINKLOCAL BIT(2) #define MIGRATE_ADDR_OBSERVED BIT(3) #define MIGRATE_ADDR_DHCP BIT(4) #define MIGRATE_ADDR_DHCPV6 BIT(5) /** * struct migrate_addr_v3 - Wire format for a single address entry * @addr: IPv6 or IPv4-mapped address (16 bytes) * @prefix_len: Prefix length * @flags: MIGRATE_ADDR_* flags (wire format) */ struct migrate_addr_v3 { struct in6_addr addr; uint8_t prefix_len; uint8_t flags; } __attribute__((__packed__)); /** * flags_to_wire() - Convert internal flags to stable wire format * @flags: Internal CONF_ADDR_* flags * * Return: Wire format MIGRATE_ADDR_* flags */ static uint8_t flags_to_wire(uint8_t flags) { uint8_t wire = 0; if (flags & CONF_ADDR_USER) wire |= MIGRATE_ADDR_USER; if (flags & CONF_ADDR_HOST) wire |= MIGRATE_ADDR_HOST; if (flags & CONF_ADDR_LINKLOCAL) wire |= MIGRATE_ADDR_LINKLOCAL; if (flags & CONF_ADDR_OBSERVED) wire |= MIGRATE_ADDR_OBSERVED; if (flags & CONF_ADDR_DHCP) wire |= MIGRATE_ADDR_DHCP; if (flags & CONF_ADDR_DHCPV6) wire |= MIGRATE_ADDR_DHCPV6; return wire; } /** * flags_from_wire() - Convert wire format flags to internal format * @wire: Wire format MIGRATE_ADDR_* flags * * Return: Internal CONF_ADDR_* flags */ static uint8_t flags_from_wire(uint8_t wire) { uint8_t flags = 0; if (wire & MIGRATE_ADDR_USER) flags |= CONF_ADDR_USER; if (wire & MIGRATE_ADDR_HOST) flags |= CONF_ADDR_HOST; if (wire & MIGRATE_ADDR_LINKLOCAL) flags |= CONF_ADDR_LINKLOCAL; if (wire & MIGRATE_ADDR_OBSERVED) flags |= CONF_ADDR_OBSERVED; if (wire & MIGRATE_ADDR_DHCP) flags |= CONF_ADDR_DHCP; if (wire & MIGRATE_ADDR_DHCPV6) flags |= CONF_ADDR_DHCPV6; return flags; } /** * seen_addrs_source_v2() - Copy and send guest observed addresses from source * @c: Execution context * @stage: Migration stage, unused * @fd: File descriptor for state transfer * * Return: 0 on success, positive error code on failure */ /* cppcheck-suppress [constParameterCallback, unmatchedSuppression] */ static int seen_addrs_source_v2(struct ctx *c, const struct migrate_stage *stage, int fd) { struct migrate_seen_addrs_v2 addrs = { 0 }; const struct guest_addr *a; (void)stage; /* IPv4 observed address, with fallback to any other non-LL address */ a = fwd_select_addr(c, AF_INET, CONF_ADDR_OBSERVED, CONF_ADDR_USER | CONF_ADDR_HOST, CONF_ADDR_LINKLOCAL); if (a) addrs.addr4 = *inany_v4(&a->addr); /* IPv6 observed address, with fallback to any other non-LL address */ a = fwd_select_addr(c, AF_INET6, CONF_ADDR_OBSERVED, CONF_ADDR_USER | CONF_ADDR_HOST, CONF_ADDR_LINKLOCAL); if (a) addrs.addr6 = a->addr.a6; /* IPv6 link-local address */ a = fwd_get_addr(c, AF_INET6, CONF_ADDR_LINKLOCAL, 0); if (a) addrs.addr6_ll = a->addr.a6; memcpy(addrs.mac, c->guest_mac, sizeof(addrs.mac)); if (write_all_buf(fd, &addrs, sizeof(addrs))) return errno; return 0; } /** * seen_addrs_target_v2() - Receive and use guest observed addresses on target * @c: Execution context * @stage: Migration stage, unused * @fd: File descriptor for state transfer * * Return: 0 on success, positive error code on failure */ static int seen_addrs_target_v2(struct ctx *c, const struct migrate_stage *stage, int fd) { struct migrate_seen_addrs_v2 addrs; struct in6_addr addr6, addr6_ll; (void)stage; if (read_all_buf(fd, &addrs, sizeof(addrs))) return errno; if (addrs.addr4.s_addr) fwd_set_addr(c, &inany_from_v4(addrs.addr4), CONF_ADDR_OBSERVED, 0); addr6 = addrs.addr6; if (!IN6_IS_ADDR_UNSPECIFIED(&addr6)) fwd_set_addr(c, &inany_from_v6(addr6), CONF_ADDR_OBSERVED, 0); addr6_ll = addrs.addr6_ll; if (!IN6_IS_ADDR_UNSPECIFIED(&addr6_ll)) fwd_set_addr(c, &inany_from_v6(addr6_ll), CONF_ADDR_OBSERVED | CONF_ADDR_LINKLOCAL, 0); memcpy(c->guest_mac, addrs.mac, sizeof(c->guest_mac)); return 0; } /** * addrs_source_v3() - Send all addresses with flags from source * @c: Execution context * @stage: Migration stage, unused * @fd: File descriptor for state transfer * * Send all address entries using a stable wire format. Each field is * serialized explicitly to avoid coupling the wire format to internal * structure layout or flag bit assignments. * * Return: 0 on success, positive error code on failure */ /* cppcheck-suppress [constParameterCallback, unmatchedSuppression] */ static int addrs_source_v3(struct ctx *c, const struct migrate_stage *stage, int fd) { uint8_t addr_count = c->addr_count; const struct guest_addr *a; (void)stage; /* Send count first */ if (write_all_buf(fd, &addr_count, sizeof(addr_count))) return errno; /* Send each address in stable wire format */ for_each_addr(a, c, 0) { struct migrate_addr_v3 wire = { .addr = a->addr.a6, .prefix_len = a->prefix_len, .flags = flags_to_wire(a->flags), }; if (write_all_buf(fd, &wire, sizeof(wire))) return errno; } /* Send MAC */ if (write_all_buf(fd, c->guest_mac, ETH_ALEN)) return errno; return 0; } /** * addrs_target_v3() - Receive addresses on target * @c: Execution context * @stage: Migration stage, unused * @fd: File descriptor for state transfer * * Receive address entries from the stable wire format and merge only * observed addresses into local array. Source sends all addresses for * forward compatibility, but target only applies those marked as observed. * * Return: 0 on success, positive error code on failure */ static int addrs_target_v3(struct ctx *c, const struct migrate_stage *stage, int fd) { uint8_t addr_count, i; (void)stage; if (read_all_buf(fd, &addr_count, sizeof(addr_count))) return errno; if (addr_count > MAX_GUEST_ADDRS) addr_count = MAX_GUEST_ADDRS; /* Read each address from stable wire format */ for (i = 0; i < addr_count; i++) { struct migrate_addr_v3 wire; struct guest_addr addr; if (read_all_buf(fd, &wire, sizeof(wire))) return errno; addr.addr.a6 = wire.addr; addr.prefix_len = wire.prefix_len; addr.flags = flags_from_wire(wire.flags); if (addr.flags & CONF_ADDR_OBSERVED) fwd_set_addr(c, &addr.addr, addr.flags, addr.prefix_len); } if (read_all_buf(fd, c->guest_mac, ETH_ALEN)) return errno; return 0; } /* Stages for version 2 */ static const struct migrate_stage stages_v2[] = { { .name = "observed addresses", .source = seen_addrs_source_v2, .target = seen_addrs_target_v2, }, { .name = "prepare flows", .source = flow_migrate_source_pre, .target = NULL, }, { .name = "transfer flows", .source = flow_migrate_source, .target = flow_migrate_target, }, { 0 }, }; /* Stages for version 3 (multiple observed IPv4 addresses) */ static const struct migrate_stage stages_v3[] = { { .name = "addresses", .source = addrs_source_v3, .target = addrs_target_v3, }, { .name = "prepare flows", .source = flow_migrate_source_pre, .target = NULL, }, { .name = "transfer flows", .source = flow_migrate_source, .target = flow_migrate_target, }, { 0 }, }; /* Supported encoding versions, from latest (most preferred) to oldest */ static const struct migrate_version versions[] = { { 3, stages_v3, }, { 2, stages_v2, }, /* v1 was released, but not widely used. It had bad endianness for the * MSS and omitted timestamps, which meant it usually wouldn't work. * Therefore we don't attempt to support compatibility with it. */ }; /* Current encoding version */ #define CURRENT_VERSION (&versions[0]) /** * migrate_source() - Migration as source, send state to hypervisor * @c: Execution context * @fd: File descriptor for state transfer * * Return: 0 on success, positive error code on failure */ static int migrate_source(struct ctx *c, int fd) { const struct migrate_version *v = CURRENT_VERSION; const struct migrate_header header = { .magic = htonll_constant(MIGRATE_MAGIC), .version = htonl(v->id), .compat_version = htonl(v->id), }; const struct migrate_stage *s; int ret; if (write_all_buf(fd, &header, sizeof(header))) { ret = errno; err("Can't send migration header: %s, abort", strerror_(ret)); return ret; } for (s = v->s; s->name; s++) { if (!s->source) continue; debug("Source side migration stage: %s", s->name); if ((ret = s->source(c, s, fd))) { err("Source migration stage: %s: %s, abort", s->name, strerror_(ret)); return ret; } } return 0; } /** * migrate_target_read_header() - Read header in target * @fd: Descriptor for state transfer * * Return: version structure on success, NULL on failure with errno set */ static const struct migrate_version *migrate_target_read_header(int fd) { struct migrate_header h; uint32_t id, compat_id; unsigned i; if (read_all_buf(fd, &h, sizeof(h))) return NULL; id = ntohl(h.version); compat_id = ntohl(h.compat_version); debug("Source magic: 0x%016" PRIx64 ", version: %u, compat: %u", ntohll(h.magic), id, compat_id); if (ntohll(h.magic) != MIGRATE_MAGIC || !id || !compat_id) { err("Invalid incoming device state"); errno = EINVAL; return NULL; } for (i = 0; i < ARRAY_SIZE(versions); i++) if (versions[i].id <= id && versions[i].id >= compat_id) return &versions[i]; errno = ENOTSUP; err("Unsupported device state version: %u", id); return NULL; } /** * migrate_target() - Migration as target, receive state from hypervisor * @c: Execution context * @fd: File descriptor for state transfer * * Return: 0 on success, positive error code on failure */ static int migrate_target(struct ctx *c, int fd) { const struct migrate_version *v; const struct migrate_stage *s; int ret; if (!(v = migrate_target_read_header(fd))) return errno; for (s = v->s; s->name; s++) { if (!s->target) continue; debug("Target side migration stage: %s", s->name); if ((ret = s->target(c, s, fd))) { err("Target migration stage: %s: %s, abort", s->name, strerror_(ret)); return ret; } } return 0; } /** * migrate_init() - Set up things necessary for migration * @c: Execution context */ void migrate_init(struct ctx *c) { c->device_state_result = -1; } /** * migrate_close() - Close migration channel and connection to passt-repair * @c: Execution context */ void migrate_close(struct ctx *c) { if (c->device_state_fd != -1) { debug("Closing migration channel, fd: %d", c->device_state_fd); close(c->device_state_fd); c->device_state_fd = -1; c->device_state_result = -1; } repair_close(c); } /** * migrate_request() - Request a migration of device state * @c: Execution context * @fd: fd to transfer state * @target: Are we the target of the migration? */ void migrate_request(struct ctx *c, int fd, bool target) { debug("Migration requested, fd: %d (was %d)", fd, c->device_state_fd); if (c->device_state_fd != -1) migrate_close(c); c->device_state_fd = fd; c->migrate_target = target; } /** * migrate_handler() - Send/receive passt internal state to/from hypervisor * @c: Execution context */ void migrate_handler(struct ctx *c) { int rc; if (c->device_state_fd < 0) return; debug("Handling migration request from fd: %d, target: %d", c->device_state_fd, c->migrate_target); if (c->migrate_target) rc = migrate_target(c, c->device_state_fd); else rc = migrate_source(c, c->device_state_fd); migrate_close(c); c->device_state_result = rc; }