* [PATCH 1/3] migrate, tcp: More careful marshalling of mss parameter during migration
2025-03-19 5:14 [PATCH 0/3] Fix migration bugs David Gibson
@ 2025-03-19 5:14 ` David Gibson
2025-03-19 5:14 ` [PATCH 2/3] migrate, tcp: Migrate RFC7323 timestamp David Gibson
2025-03-19 5:14 ` [PATCH 3/3] migrate: Bump migration version number David Gibson
2 siblings, 0 replies; 5+ messages in thread
From: David Gibson @ 2025-03-19 5:14 UTC (permalink / raw)
To: Stefano Brivio, passt-dev; +Cc: David Gibson
During migration we extract the limit on segment size using TCP_MAXSEG,
and set it on the other side with TCP_REPAIR_OPTIONS. However, unlike most
32-bit values we transfer we transfer it in native endian, not network
endian. This is not correct; add it to the list of endian fixups we make.
In addition, while MAXSEG will be 32-bits in practice, and is given as such
to TCP_REPAIR_OPTIONS, the TCP_MAXSEG sockopt treats it as an 'int'. It's
not strictly safe to pass a uint32_t to a getsockopt() expecting an int,
although we'll get away with it on most (maybe all) platforms. Correct
this as well.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
tcp.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/tcp.c b/tcp.c
index a4c840e6..163ddd60 100644
--- a/tcp.c
+++ b/tcp.c
@@ -2847,14 +2847,17 @@ static int tcp_flow_dump_tinfo(const struct tcp_tap_conn *conn,
static int tcp_flow_dump_mss(const struct tcp_tap_conn *conn,
struct tcp_tap_transfer_ext *t)
{
+ int val;
socklen_t sl = sizeof(t->mss);
- if (getsockopt(conn->sock, SOL_TCP, TCP_MAXSEG, &t->mss, &sl)) {
+ if (getsockopt(conn->sock, SOL_TCP, TCP_MAXSEG, &val, &sl)) {
int rc = -errno;
flow_perror(conn, "Getting MSS");
return rc;
}
+ t->mss = (uint32_t)val;
+
return 0;
}
@@ -3301,6 +3304,7 @@ int tcp_flow_migrate_source_ext(int fd, const struct tcp_tap_conn *conn)
t->sndq = htonl(t->sndq);
t->notsent = htonl(t->notsent);
t->rcvq = htonl(t->rcvq);
+ t->mss = htonl(t->mss);
t->snd_wl1 = htonl(t->snd_wl1);
t->snd_wnd = htonl(t->snd_wnd);
@@ -3514,6 +3518,7 @@ int tcp_flow_migrate_target_ext(struct ctx *c, struct tcp_tap_conn *conn, int fd
t.sndq = ntohl(t.sndq);
t.notsent = ntohl(t.notsent);
t.rcvq = ntohl(t.rcvq);
+ t.mss = ntohl(t.mss);
t.snd_wl1 = ntohl(t.snd_wl1);
t.snd_wnd = ntohl(t.snd_wnd);
--
@@ -2847,14 +2847,17 @@ static int tcp_flow_dump_tinfo(const struct tcp_tap_conn *conn,
static int tcp_flow_dump_mss(const struct tcp_tap_conn *conn,
struct tcp_tap_transfer_ext *t)
{
+ int val;
socklen_t sl = sizeof(t->mss);
- if (getsockopt(conn->sock, SOL_TCP, TCP_MAXSEG, &t->mss, &sl)) {
+ if (getsockopt(conn->sock, SOL_TCP, TCP_MAXSEG, &val, &sl)) {
int rc = -errno;
flow_perror(conn, "Getting MSS");
return rc;
}
+ t->mss = (uint32_t)val;
+
return 0;
}
@@ -3301,6 +3304,7 @@ int tcp_flow_migrate_source_ext(int fd, const struct tcp_tap_conn *conn)
t->sndq = htonl(t->sndq);
t->notsent = htonl(t->notsent);
t->rcvq = htonl(t->rcvq);
+ t->mss = htonl(t->mss);
t->snd_wl1 = htonl(t->snd_wl1);
t->snd_wnd = htonl(t->snd_wnd);
@@ -3514,6 +3518,7 @@ int tcp_flow_migrate_target_ext(struct ctx *c, struct tcp_tap_conn *conn, int fd
t.sndq = ntohl(t.sndq);
t.notsent = ntohl(t.notsent);
t.rcvq = ntohl(t.rcvq);
+ t.mss = ntohl(t.mss);
t.snd_wl1 = ntohl(t.snd_wl1);
t.snd_wnd = ntohl(t.snd_wnd);
--
2.48.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/3] migrate, tcp: Migrate RFC7323 timestamp
2025-03-19 5:14 [PATCH 0/3] Fix migration bugs David Gibson
2025-03-19 5:14 ` [PATCH 1/3] migrate, tcp: More careful marshalling of mss parameter during migration David Gibson
@ 2025-03-19 5:14 ` David Gibson
2025-03-19 16:17 ` Stefano Brivio
2025-03-19 5:14 ` [PATCH 3/3] migrate: Bump migration version number David Gibson
2 siblings, 1 reply; 5+ messages in thread
From: David Gibson @ 2025-03-19 5:14 UTC (permalink / raw)
To: Stefano Brivio, passt-dev; +Cc: David Gibson
Currently our migration of the state of TCP sockets omits the RFC7323
timestamp. In some circumstances that can result in data sent from the
target machine not being received, because it is discarded on the peer due
to PAWS checking.
Add code to dump and restore the timestamp across migration.
Link: https://bugs.passt.top/show_bug.cgi?id=115
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
tcp.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
tcp_conn.h | 2 ++
2 files changed, 61 insertions(+)
diff --git a/tcp.c b/tcp.c
index 163ddd60..56df1636 100644
--- a/tcp.c
+++ b/tcp.c
@@ -2861,6 +2861,57 @@ static int tcp_flow_dump_mss(const struct tcp_tap_conn *conn,
return 0;
}
+
+/**
+ * tcp_flow_dump_timestamp() - Dump RFC7323 timestamp via TCP_TIMESTAMP
+ * @conn: Pointer to the TCP connection structure
+ * @t: Extended migration data (tcpi_options must be populated)
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+static int tcp_flow_dump_timestamp(const struct tcp_tap_conn *conn,
+ struct tcp_tap_transfer_ext *t)
+{
+ int val = 0;
+
+ if (t->tcpi_options & TCPI_OPT_TIMESTAMPS) {
+ socklen_t sl = sizeof(val);
+
+ if (getsockopt(conn->sock, SOL_TCP, TCP_TIMESTAMP, &val, &sl)) {
+ int rc = -errno;
+ flow_perror(conn, "Getting RFC7323 timestamp");
+ return rc;
+ }
+ }
+
+ t->timestamp = (uint32_t)val;
+ return 0;
+}
+
+/**
+ * tcp_flow_repair_timestamp() - Restore RFC7323 timestamp via TCP_TIMESTAMP
+ * @conn: Pointer to the TCP connection structure
+ * @t: Extended migration data
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+static int tcp_flow_repair_timestamp(const struct tcp_tap_conn *conn,
+ const struct tcp_tap_transfer_ext *t)
+{
+ int val = (int)t->timestamp;
+
+ if (t->tcpi_options & TCPI_OPT_TIMESTAMPS) {
+ if (setsockopt(conn->sock, SOL_TCP, TCP_TIMESTAMP,
+ &val, sizeof(val))) {
+ int rc = -errno;
+ flow_perror(conn, "Setting RFC7323 timestamp");
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
/**
* tcp_flow_dump_wnd() - Dump current tcp_repair_window parameters
* @conn: Pointer to the TCP connection structure
@@ -3260,6 +3311,9 @@ int tcp_flow_migrate_source_ext(int fd, const struct tcp_tap_conn *conn)
if ((rc = tcp_flow_dump_mss(conn, t)))
goto fail;
+ if ((rc = tcp_flow_dump_timestamp(conn, t)))
+ goto fail;
+
if ((rc = tcp_flow_dump_wnd(conn, t)))
goto fail;
@@ -3305,6 +3359,7 @@ int tcp_flow_migrate_source_ext(int fd, const struct tcp_tap_conn *conn)
t->notsent = htonl(t->notsent);
t->rcvq = htonl(t->rcvq);
t->mss = htonl(t->mss);
+ t->timestamp = htonl(t->timestamp);
t->snd_wl1 = htonl(t->snd_wl1);
t->snd_wnd = htonl(t->snd_wnd);
@@ -3519,6 +3574,7 @@ int tcp_flow_migrate_target_ext(struct ctx *c, struct tcp_tap_conn *conn, int fd
t.notsent = ntohl(t.notsent);
t.rcvq = ntohl(t.rcvq);
t.mss = ntohl(t.mss);
+ t.timestamp = ntohl(t.timestamp);
t.snd_wl1 = ntohl(t.snd_wl1);
t.snd_wnd = ntohl(t.snd_wnd);
@@ -3561,6 +3617,9 @@ int tcp_flow_migrate_target_ext(struct ctx *c, struct tcp_tap_conn *conn, int fd
/* We weren't able to create the socket, discard flow */
goto fail;
+ if (tcp_flow_repair_timestamp(conn, &t))
+ goto fail;
+
if (tcp_flow_select_queue(conn, TCP_SEND_QUEUE))
goto fail;
diff --git a/tcp_conn.h b/tcp_conn.h
index 9126a36f..c79f558b 100644
--- a/tcp_conn.h
+++ b/tcp_conn.h
@@ -152,6 +152,7 @@ struct tcp_tap_transfer {
* @notsent: Part of pending send queue that wasn't sent out yet
* @rcvq: Length of pending receive queue
* @mss: Socket-side MSS clamp
+ * @timestamp: RFC7323 timestamp
* @snd_wl1: Next sequence used in window probe (next sequence - 1)
* @snd_wnd: Socket-side sending window
* @max_window: Window clamp
@@ -171,6 +172,7 @@ struct tcp_tap_transfer_ext {
uint32_t rcvq;
uint32_t mss;
+ uint32_t timestamp;
/* We can't just use struct tcp_repair_window: we need network order */
uint32_t snd_wl1;
--
@@ -152,6 +152,7 @@ struct tcp_tap_transfer {
* @notsent: Part of pending send queue that wasn't sent out yet
* @rcvq: Length of pending receive queue
* @mss: Socket-side MSS clamp
+ * @timestamp: RFC7323 timestamp
* @snd_wl1: Next sequence used in window probe (next sequence - 1)
* @snd_wnd: Socket-side sending window
* @max_window: Window clamp
@@ -171,6 +172,7 @@ struct tcp_tap_transfer_ext {
uint32_t rcvq;
uint32_t mss;
+ uint32_t timestamp;
/* We can't just use struct tcp_repair_window: we need network order */
uint32_t snd_wl1;
--
2.48.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH 2/3] migrate, tcp: Migrate RFC7323 timestamp
2025-03-19 5:14 ` [PATCH 2/3] migrate, tcp: Migrate RFC7323 timestamp David Gibson
@ 2025-03-19 16:17 ` Stefano Brivio
0 siblings, 0 replies; 5+ messages in thread
From: Stefano Brivio @ 2025-03-19 16:17 UTC (permalink / raw)
To: David Gibson; +Cc: passt-dev
On Wed, 19 Mar 2025 16:14:22 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:
> Currently our migration of the state of TCP sockets omits the RFC7323
> timestamp. In some circumstances that can result in data sent from the
> target machine not being received, because it is discarded on the peer due
> to PAWS checking.
>
> Add code to dump and restore the timestamp across migration.
>
> Link: https://bugs.passt.top/show_bug.cgi?id=115
>
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Sneakily changed all "RFC7323" references to the actual name of the
document, "RFC 7323", and dropped:
> ---
> tcp.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
> tcp_conn.h | 2 ++
> 2 files changed, 61 insertions(+)
>
> diff --git a/tcp.c b/tcp.c
> index 163ddd60..56df1636 100644
> --- a/tcp.c
> +++ b/tcp.c
> @@ -2861,6 +2861,57 @@ static int tcp_flow_dump_mss(const struct tcp_tap_conn *conn,
> return 0;
> }
>
> +
> +/**
> + * tcp_flow_dump_timestamp() - Dump RFC7323 timestamp via TCP_TIMESTAMP
> + * @conn: Pointer to the TCP connection structure
> + * @t: Extended migration data (tcpi_options must be populated)
> + *
> + * Return: 0 on success, negative error code on failure
> + */
> +static int tcp_flow_dump_timestamp(const struct tcp_tap_conn *conn,
> + struct tcp_tap_transfer_ext *t)
> +{
> + int val = 0;
> +
> + if (t->tcpi_options & TCPI_OPT_TIMESTAMPS) {
> + socklen_t sl = sizeof(val);
> +
> + if (getsockopt(conn->sock, SOL_TCP, TCP_TIMESTAMP, &val, &sl)) {
> + int rc = -errno;
> + flow_perror(conn, "Getting RFC7323 timestamp");
> + return rc;
> + }
> + }
> +
> + t->timestamp = (uint32_t)val;
> + return 0;
> +}
> +
> +/**
> + * tcp_flow_repair_timestamp() - Restore RFC7323 timestamp via TCP_TIMESTAMP
> + * @conn: Pointer to the TCP connection structure
> + * @t: Extended migration data
> + *
> + * Return: 0 on success, negative error code on failure
> + */
> +static int tcp_flow_repair_timestamp(const struct tcp_tap_conn *conn,
> + const struct tcp_tap_transfer_ext *t)
> +{
> + int val = (int)t->timestamp;
> +
> + if (t->tcpi_options & TCPI_OPT_TIMESTAMPS) {
> + if (setsockopt(conn->sock, SOL_TCP, TCP_TIMESTAMP,
> + &val, sizeof(val))) {
> + int rc = -errno;
> + flow_perror(conn, "Setting RFC7323 timestamp");
> + return rc;
> + }
> + }
> +
> + return 0;
> +}
> +
> /**
> * tcp_flow_dump_wnd() - Dump current tcp_repair_window parameters
> * @conn: Pointer to the TCP connection structure
> @@ -3260,6 +3311,9 @@ int tcp_flow_migrate_source_ext(int fd, const struct tcp_tap_conn *conn)
> if ((rc = tcp_flow_dump_mss(conn, t)))
> goto fail;
>
> + if ((rc = tcp_flow_dump_timestamp(conn, t)))
> + goto fail;
> +
> if ((rc = tcp_flow_dump_wnd(conn, t)))
> goto fail;
>
> @@ -3305,6 +3359,7 @@ int tcp_flow_migrate_source_ext(int fd, const struct tcp_tap_conn *conn)
> t->notsent = htonl(t->notsent);
> t->rcvq = htonl(t->rcvq);
> t->mss = htonl(t->mss);
> + t->timestamp = htonl(t->timestamp);
>
> t->snd_wl1 = htonl(t->snd_wl1);
> t->snd_wnd = htonl(t->snd_wnd);
> @@ -3519,6 +3574,7 @@ int tcp_flow_migrate_target_ext(struct ctx *c, struct tcp_tap_conn *conn, int fd
> t.notsent = ntohl(t.notsent);
> t.rcvq = ntohl(t.rcvq);
> t.mss = ntohl(t.mss);
> + t.timestamp = ntohl(t.timestamp);
>
> t.snd_wl1 = ntohl(t.snd_wl1);
> t.snd_wnd = ntohl(t.snd_wnd);
> @@ -3561,6 +3617,9 @@ int tcp_flow_migrate_target_ext(struct ctx *c, struct tcp_tap_conn *conn, int fd
> /* We weren't able to create the socket, discard flow */
> goto fail;
>
> + if (tcp_flow_repair_timestamp(conn, &t))
> + goto fail;
> +
...this stray tab. Series applied.
> if (tcp_flow_select_queue(conn, TCP_SEND_QUEUE))
> goto fail;
>
> diff --git a/tcp_conn.h b/tcp_conn.h
> index 9126a36f..c79f558b 100644
> --- a/tcp_conn.h
> +++ b/tcp_conn.h
> @@ -152,6 +152,7 @@ struct tcp_tap_transfer {
> * @notsent: Part of pending send queue that wasn't sent out yet
> * @rcvq: Length of pending receive queue
> * @mss: Socket-side MSS clamp
> + * @timestamp: RFC7323 timestamp
> * @snd_wl1: Next sequence used in window probe (next sequence - 1)
> * @snd_wnd: Socket-side sending window
> * @max_window: Window clamp
> @@ -171,6 +172,7 @@ struct tcp_tap_transfer_ext {
> uint32_t rcvq;
>
> uint32_t mss;
> + uint32_t timestamp;
>
> /* We can't just use struct tcp_repair_window: we need network order */
> uint32_t snd_wl1;
--
Stefano
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 3/3] migrate: Bump migration version number
2025-03-19 5:14 [PATCH 0/3] Fix migration bugs David Gibson
2025-03-19 5:14 ` [PATCH 1/3] migrate, tcp: More careful marshalling of mss parameter during migration David Gibson
2025-03-19 5:14 ` [PATCH 2/3] migrate, tcp: Migrate RFC7323 timestamp David Gibson
@ 2025-03-19 5:14 ` David Gibson
2 siblings, 0 replies; 5+ messages in thread
From: David Gibson @ 2025-03-19 5:14 UTC (permalink / raw)
To: Stefano Brivio, passt-dev; +Cc: David Gibson
v1 of the migration stream format, had some flaws: it didn't properly
handle endianness of the MSS field, and it didn't transfer the RFC7323
timestamp. We've now fixed those bugs, but it requires incompatible
changes to the stream format.
Because of the timestamps in particular, v1 is not really usable, so there
is little point maintaining compatible support for it. However, v1 is in
released packages, both upstream and downstream (RHEL at least). Just
updating the stream format without bumping the version would lead to very
cryptic errors if anyone did attempt to migrate between an old and new
passt.
So, bump the migration version to v2, so we'll get a clear error message if
anyone attempts this. We don't attempt to maintain backwards compatibility
with v1, however: we'll simply fail if given a v1 stream.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
migrate.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/migrate.c b/migrate.c
index 0fca77b7..48d63a07 100644
--- a/migrate.c
+++ b/migrate.c
@@ -96,8 +96,8 @@ static int seen_addrs_target_v1(struct ctx *c,
return 0;
}
-/* Stages for version 1 */
-static const struct migrate_stage stages_v1[] = {
+/* Stages for version 2 */
+static const struct migrate_stage stages_v2[] = {
{
.name = "observed addresses",
.source = seen_addrs_source_v1,
@@ -118,7 +118,11 @@ static const struct migrate_stage stages_v1[] = {
/* Supported encoding versions, from latest (most preferred) to oldest */
static const struct migrate_version versions[] = {
- { 1, stages_v1, },
+ { 2, stages_v2, },
+ /* v1 was released, but not widely used. It had bad endianness for the
+ * MSS and omitted timestamps, which meant it usually wouldn't work.
+ * Therefore we don't attempt to support compatibility with it.
+ */
{ 0 },
};
--
@@ -96,8 +96,8 @@ static int seen_addrs_target_v1(struct ctx *c,
return 0;
}
-/* Stages for version 1 */
-static const struct migrate_stage stages_v1[] = {
+/* Stages for version 2 */
+static const struct migrate_stage stages_v2[] = {
{
.name = "observed addresses",
.source = seen_addrs_source_v1,
@@ -118,7 +118,11 @@ static const struct migrate_stage stages_v1[] = {
/* Supported encoding versions, from latest (most preferred) to oldest */
static const struct migrate_version versions[] = {
- { 1, stages_v1, },
+ { 2, stages_v2, },
+ /* v1 was released, but not widely used. It had bad endianness for the
+ * MSS and omitted timestamps, which meant it usually wouldn't work.
+ * Therefore we don't attempt to support compatibility with it.
+ */
{ 0 },
};
--
2.48.1
^ permalink raw reply related [flat|nested] 5+ messages in thread