* [PATCH] tcp: Set PSH flag for last incoming packets in a batch
@ 2025-01-20 23:49 Stefano Brivio
2025-01-20 23:56 ` Stefano Brivio
2025-01-22 1:06 ` David Gibson
0 siblings, 2 replies; 3+ messages in thread
From: Stefano Brivio @ 2025-01-20 23:49 UTC (permalink / raw)
To: passt-dev
So far we omitted setting PSH flags for inbound traffic altogether: as
we ignore the nature of the data we're sending, we can't conclude that
some data is more or less urgent. This works fine with Linux guests,
as the Linux kernel doesn't do much with it, on input: it will
generally deliver data to the application layer without delay.
However, with Windows, things change: if we don't set the PSH flag on
interactive inbound traffic, we can expect long delays before the data
is delivered to the application.
This is very visible with RDP, where packets we send on behalf of the
RDP client are delivered with delays exceeding one second:
$ tshark -r rdp.pcap -td -Y 'frame.number in { 33170 .. 33173 }' --disable-protocol tls
33170 0.030296 93.235.154.248 → 88.198.0.164 54 TCP 49012 → 3389 [ACK] Seq=13820 Ack=285229 Win=387968 Len=0
33171 0.985412 88.198.0.164 → 93.235.154.248 105 TCP 3389 → 49012 [PSH, ACK] Seq=285229 Ack=13820 Win=63198 Len=51
33172 0.030373 93.235.154.248 → 88.198.0.164 54 TCP 49012 → 3389 [ACK] Seq=13820 Ack=285280 Win=387968 Len=0
33173 1.383776 88.198.0.164 → 93.235.154.248 424 TCP 3389 → 49012 [PSH, ACK] Seq=285280 Ack=13820 Win=63198 Len=370
in this example (packet capture taken by passt), frame #33172 is a
mouse event sent by the RDP client, and frame #33173 is the first
event (display reacting to click) sent back by the server. This
appears as a 1.4 s delay before we get frame #33173.
If we set PSH, instead:
$ tshark -r rdp_psh.pcap -td -Y 'frame.number in { 314 .. 317 }' --disable-protocol tls
314 0.002503 93.235.154.248 → 88.198.0.164 170 TCP 51066 → 3389 [PSH, ACK] Seq=7779 Ack=74047 Win=31872 Len=116
315 0.000557 88.198.0.164 → 93.235.154.248 54 TCP 3389 → 51066 [ACK] Seq=79162 Ack=7895 Win=62872 Len=0
316 0.012752 93.235.154.248 → 88.198.0.164 170 TCP 51066 → 3389 [PSH, ACK] Seq=7895 Ack=79162 Win=31872 Len=116
317 0.011927 88.198.0.164 → 93.235.154.248 107 TCP 3389 → 51066 [PSH, ACK] Seq=79162 Ack=8011 Win=62756 Len=53
here, in frame #116, our mouse event is delivered without a delay and
receives a response in approximately 12 ms.
Set PSH on the last segment for any batch we dequeue from the socket,
that is, set it whenever we know that we might not be sending data to
the same port for a while.
Reported-by: NN708
Link: https://bugs.passt.top/show_bug.cgi?id=107
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
---
tcp_buf.c | 11 ++++++++---
tcp_vu.c | 7 +++++--
2 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/tcp_buf.c b/tcp_buf.c
index cbefa42..72d99c5 100644
--- a/tcp_buf.c
+++ b/tcp_buf.c
@@ -239,9 +239,10 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
* @dlen: TCP payload length
* @no_csum: Don't compute IPv4 checksum, use the one from previous buffer
* @seq: Sequence number to be sent
+ * @push: Set PSH flag, last segment in a batch
*/
static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
- ssize_t dlen, int no_csum, uint32_t seq)
+ ssize_t dlen, int no_csum, uint32_t seq, bool push)
{
struct tcp_payload_t *payload;
const uint16_t *check = NULL;
@@ -268,6 +269,7 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
payload->th.th_x2 = 0;
payload->th.th_flags = 0;
payload->th.ack = 1;
+ payload->th.psh = push;
iov[TCP_IOV_PAYLOAD].iov_len = dlen + sizeof(struct tcphdr);
tcp_l2_buf_fill_headers(conn, iov, check, seq, false);
if (++tcp_payload_used > TCP_FRAMES_MEM - 1)
@@ -402,11 +404,14 @@ int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
seq = conn->seq_to_tap;
for (i = 0; i < send_bufs; i++) {
int no_csum = i && i != send_bufs - 1 && tcp_payload_used;
+ bool push = false;
- if (i == send_bufs - 1)
+ if (i == send_bufs - 1) {
dlen = last_len;
+ push = true;
+ }
- tcp_data_to_tap(c, conn, dlen, no_csum, seq);
+ tcp_data_to_tap(c, conn, dlen, no_csum, seq, push);
seq += dlen;
}
diff --git a/tcp_vu.c b/tcp_vu.c
index a216bb1..fad7065 100644
--- a/tcp_vu.c
+++ b/tcp_vu.c
@@ -289,10 +289,11 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c,
* @iov_cnt: Number of entries in @iov
* @check: Checksum, if already known
* @no_tcp_csum: Do not set TCP checksum
+ * @push: Set PSH flag, last segment in a batch
*/
static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
struct iovec *iov, size_t iov_cnt,
- const uint16_t **check, bool no_tcp_csum)
+ const uint16_t **check, bool no_tcp_csum, bool push)
{
const struct flowside *toside = TAPFLOW(conn);
bool v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
@@ -334,6 +335,7 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
memset(th, 0, sizeof(*th));
th->doff = sizeof(*th) / 4;
th->ack = 1;
+ th->psh = push;
tcp_fill_headers(conn, NULL, ip4h, ip6h, th, &payload,
*check, conn->seq_to_tap, no_tcp_csum);
@@ -443,6 +445,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
struct iovec *iov = &elem[head[i]].in_sg[0];
int buf_cnt = head[i + 1] - head[i];
ssize_t dlen = iov_size(iov, buf_cnt) - hdrlen;
+ bool push = i == head_cnt - 1;
vu_set_vnethdr(vdev, iov->iov_base, buf_cnt);
@@ -451,7 +454,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
check = NULL;
previous_dlen = dlen;
- tcp_vu_prepare(c, conn, iov, buf_cnt, &check, !*c->pcap);
+ tcp_vu_prepare(c, conn, iov, buf_cnt, &check, !*c->pcap, push);
if (*c->pcap) {
pcap_iov(iov, buf_cnt,
--
@@ -289,10 +289,11 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c,
* @iov_cnt: Number of entries in @iov
* @check: Checksum, if already known
* @no_tcp_csum: Do not set TCP checksum
+ * @push: Set PSH flag, last segment in a batch
*/
static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
struct iovec *iov, size_t iov_cnt,
- const uint16_t **check, bool no_tcp_csum)
+ const uint16_t **check, bool no_tcp_csum, bool push)
{
const struct flowside *toside = TAPFLOW(conn);
bool v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
@@ -334,6 +335,7 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
memset(th, 0, sizeof(*th));
th->doff = sizeof(*th) / 4;
th->ack = 1;
+ th->psh = push;
tcp_fill_headers(conn, NULL, ip4h, ip6h, th, &payload,
*check, conn->seq_to_tap, no_tcp_csum);
@@ -443,6 +445,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
struct iovec *iov = &elem[head[i]].in_sg[0];
int buf_cnt = head[i + 1] - head[i];
ssize_t dlen = iov_size(iov, buf_cnt) - hdrlen;
+ bool push = i == head_cnt - 1;
vu_set_vnethdr(vdev, iov->iov_base, buf_cnt);
@@ -451,7 +454,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
check = NULL;
previous_dlen = dlen;
- tcp_vu_prepare(c, conn, iov, buf_cnt, &check, !*c->pcap);
+ tcp_vu_prepare(c, conn, iov, buf_cnt, &check, !*c->pcap, push);
if (*c->pcap) {
pcap_iov(iov, buf_cnt,
--
2.43.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] tcp: Set PSH flag for last incoming packets in a batch
2025-01-20 23:49 [PATCH] tcp: Set PSH flag for last incoming packets in a batch Stefano Brivio
@ 2025-01-20 23:56 ` Stefano Brivio
2025-01-22 1:06 ` David Gibson
1 sibling, 0 replies; 3+ messages in thread
From: Stefano Brivio @ 2025-01-20 23:56 UTC (permalink / raw)
To: passt-dev
On Tue, 21 Jan 2025 00:49:58 +0100
Stefano Brivio <sbrivio@redhat.com> wrote:
> So far we omitted setting PSH flags for inbound traffic altogether: as
> we ignore the nature of the data we're sending, we can't conclude that
> some data is more or less urgent. This works fine with Linux guests,
> as the Linux kernel doesn't do much with it, on input: it will
> generally deliver data to the application layer without delay.
>
> However, with Windows, things change: if we don't set the PSH flag on
> interactive inbound traffic, we can expect long delays before the data
> is delivered to the application.
>
> This is very visible with RDP, where packets we send on behalf of the
> RDP client are delivered with delays exceeding one second:
>
> $ tshark -r rdp.pcap -td -Y 'frame.number in { 33170 .. 33173 }' --disable-protocol tls
> 33170 0.030296 93.235.154.248 → 88.198.0.164 54 TCP 49012 → 3389 [ACK] Seq=13820 Ack=285229 Win=387968 Len=0
> 33171 0.985412 88.198.0.164 → 93.235.154.248 105 TCP 3389 → 49012 [PSH, ACK] Seq=285229 Ack=13820 Win=63198 Len=51
> 33172 0.030373 93.235.154.248 → 88.198.0.164 54 TCP 49012 → 3389 [ACK] Seq=13820 Ack=285280 Win=387968 Len=0
> 33173 1.383776 88.198.0.164 → 93.235.154.248 424 TCP 3389 → 49012 [PSH, ACK] Seq=285280 Ack=13820 Win=63198 Len=370
>
> in this example (packet capture taken by passt), frame #33172 is a
> mouse event sent by the RDP client, and frame #33173 is the first
> event (display reacting to click) sent back by the server. This
> appears as a 1.4 s delay before we get frame #33173.
>
> If we set PSH, instead:
>
> $ tshark -r rdp_psh.pcap -td -Y 'frame.number in { 314 .. 317 }' --disable-protocol tls
> 314 0.002503 93.235.154.248 → 88.198.0.164 170 TCP 51066 → 3389 [PSH, ACK] Seq=7779 Ack=74047 Win=31872 Len=116
> 315 0.000557 88.198.0.164 → 93.235.154.248 54 TCP 3389 → 51066 [ACK] Seq=79162 Ack=7895 Win=62872 Len=0
> 316 0.012752 93.235.154.248 → 88.198.0.164 170 TCP 51066 → 3389 [PSH, ACK] Seq=7895 Ack=79162 Win=31872 Len=116
> 317 0.011927 88.198.0.164 → 93.235.154.248 107 TCP 3389 → 51066 [PSH, ACK] Seq=79162 Ack=8011 Win=62756 Len=53
>
> here, in frame #116, our mouse event is delivered without a delay and
^^^ 316
--
Stefano
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] tcp: Set PSH flag for last incoming packets in a batch
2025-01-20 23:49 [PATCH] tcp: Set PSH flag for last incoming packets in a batch Stefano Brivio
2025-01-20 23:56 ` Stefano Brivio
@ 2025-01-22 1:06 ` David Gibson
1 sibling, 0 replies; 3+ messages in thread
From: David Gibson @ 2025-01-22 1:06 UTC (permalink / raw)
To: Stefano Brivio; +Cc: passt-dev
[-- Attachment #1: Type: text/plain, Size: 6406 bytes --]
On Tue, Jan 21, 2025 at 12:49:58AM +0100, Stefano Brivio wrote:
> So far we omitted setting PSH flags for inbound traffic altogether: as
> we ignore the nature of the data we're sending, we can't conclude that
> some data is more or less urgent. This works fine with Linux guests,
> as the Linux kernel doesn't do much with it, on input: it will
> generally deliver data to the application layer without delay.
>
> However, with Windows, things change: if we don't set the PSH flag on
> interactive inbound traffic, we can expect long delays before the data
> is delivered to the application.
>
> This is very visible with RDP, where packets we send on behalf of the
> RDP client are delivered with delays exceeding one second:
>
> $ tshark -r rdp.pcap -td -Y 'frame.number in { 33170 .. 33173 }' --disable-protocol tls
> 33170 0.030296 93.235.154.248 → 88.198.0.164 54 TCP 49012 → 3389 [ACK] Seq=13820 Ack=285229 Win=387968 Len=0
> 33171 0.985412 88.198.0.164 → 93.235.154.248 105 TCP 3389 → 49012 [PSH, ACK] Seq=285229 Ack=13820 Win=63198 Len=51
> 33172 0.030373 93.235.154.248 → 88.198.0.164 54 TCP 49012 → 3389 [ACK] Seq=13820 Ack=285280 Win=387968 Len=0
> 33173 1.383776 88.198.0.164 → 93.235.154.248 424 TCP 3389 → 49012 [PSH, ACK] Seq=285280 Ack=13820 Win=63198 Len=370
>
> in this example (packet capture taken by passt), frame #33172 is a
> mouse event sent by the RDP client, and frame #33173 is the first
> event (display reacting to click) sent back by the server. This
> appears as a 1.4 s delay before we get frame #33173.
>
> If we set PSH, instead:
>
> $ tshark -r rdp_psh.pcap -td -Y 'frame.number in { 314 .. 317 }' --disable-protocol tls
> 314 0.002503 93.235.154.248 → 88.198.0.164 170 TCP 51066 → 3389 [PSH, ACK] Seq=7779 Ack=74047 Win=31872 Len=116
> 315 0.000557 88.198.0.164 → 93.235.154.248 54 TCP 3389 → 51066 [ACK] Seq=79162 Ack=7895 Win=62872 Len=0
> 316 0.012752 93.235.154.248 → 88.198.0.164 170 TCP 51066 → 3389 [PSH, ACK] Seq=7895 Ack=79162 Win=31872 Len=116
> 317 0.011927 88.198.0.164 → 93.235.154.248 107 TCP 3389 → 51066 [PSH, ACK] Seq=79162 Ack=8011 Win=62756 Len=53
>
> here, in frame #116, our mouse event is delivered without a delay and
> receives a response in approximately 12 ms.
>
> Set PSH on the last segment for any batch we dequeue from the socket,
> that is, set it whenever we know that we might not be sending data to
> the same port for a while.
Huh, fascinating.
> Reported-by: NN708
> Link: https://bugs.passt.top/show_bug.cgi?id=107
> Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> ---
> tcp_buf.c | 11 ++++++++---
> tcp_vu.c | 7 +++++--
> 2 files changed, 13 insertions(+), 5 deletions(-)
>
> diff --git a/tcp_buf.c b/tcp_buf.c
> index cbefa42..72d99c5 100644
> --- a/tcp_buf.c
> +++ b/tcp_buf.c
> @@ -239,9 +239,10 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
> * @dlen: TCP payload length
> * @no_csum: Don't compute IPv4 checksum, use the one from previous buffer
> * @seq: Sequence number to be sent
> + * @push: Set PSH flag, last segment in a batch
> */
> static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
> - ssize_t dlen, int no_csum, uint32_t seq)
> + ssize_t dlen, int no_csum, uint32_t seq, bool push)
> {
> struct tcp_payload_t *payload;
> const uint16_t *check = NULL;
> @@ -268,6 +269,7 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
> payload->th.th_x2 = 0;
> payload->th.th_flags = 0;
> payload->th.ack = 1;
> + payload->th.psh = push;
> iov[TCP_IOV_PAYLOAD].iov_len = dlen + sizeof(struct tcphdr);
> tcp_l2_buf_fill_headers(conn, iov, check, seq, false);
> if (++tcp_payload_used > TCP_FRAMES_MEM - 1)
> @@ -402,11 +404,14 @@ int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
> seq = conn->seq_to_tap;
> for (i = 0; i < send_bufs; i++) {
> int no_csum = i && i != send_bufs - 1 && tcp_payload_used;
> + bool push = false;
>
> - if (i == send_bufs - 1)
> + if (i == send_bufs - 1) {
> dlen = last_len;
> + push = true;
> + }
>
> - tcp_data_to_tap(c, conn, dlen, no_csum, seq);
> + tcp_data_to_tap(c, conn, dlen, no_csum, seq, push);
> seq += dlen;
> }
>
> diff --git a/tcp_vu.c b/tcp_vu.c
> index a216bb1..fad7065 100644
> --- a/tcp_vu.c
> +++ b/tcp_vu.c
> @@ -289,10 +289,11 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c,
> * @iov_cnt: Number of entries in @iov
> * @check: Checksum, if already known
> * @no_tcp_csum: Do not set TCP checksum
> + * @push: Set PSH flag, last segment in a batch
> */
> static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
> struct iovec *iov, size_t iov_cnt,
> - const uint16_t **check, bool no_tcp_csum)
> + const uint16_t **check, bool no_tcp_csum, bool push)
> {
> const struct flowside *toside = TAPFLOW(conn);
> bool v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
> @@ -334,6 +335,7 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
> memset(th, 0, sizeof(*th));
> th->doff = sizeof(*th) / 4;
> th->ack = 1;
> + th->psh = push;
>
> tcp_fill_headers(conn, NULL, ip4h, ip6h, th, &payload,
> *check, conn->seq_to_tap, no_tcp_csum);
> @@ -443,6 +445,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
> struct iovec *iov = &elem[head[i]].in_sg[0];
> int buf_cnt = head[i + 1] - head[i];
> ssize_t dlen = iov_size(iov, buf_cnt) - hdrlen;
> + bool push = i == head_cnt - 1;
>
> vu_set_vnethdr(vdev, iov->iov_base, buf_cnt);
>
> @@ -451,7 +454,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
> check = NULL;
> previous_dlen = dlen;
>
> - tcp_vu_prepare(c, conn, iov, buf_cnt, &check, !*c->pcap);
> + tcp_vu_prepare(c, conn, iov, buf_cnt, &check, !*c->pcap, push);
>
> if (*c->pcap) {
> pcap_iov(iov, buf_cnt,
--
David Gibson (he or they) | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you, not the other way
| around.
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2025-01-22 1:09 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-01-20 23:49 [PATCH] tcp: Set PSH flag for last incoming packets in a batch Stefano Brivio
2025-01-20 23:56 ` Stefano Brivio
2025-01-22 1:06 ` David Gibson
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).