On Tue, Jan 21, 2025 at 12:49:58AM +0100, Stefano Brivio wrote: > So far we omitted setting PSH flags for inbound traffic altogether: as > we ignore the nature of the data we're sending, we can't conclude that > some data is more or less urgent. This works fine with Linux guests, > as the Linux kernel doesn't do much with it, on input: it will > generally deliver data to the application layer without delay. > > However, with Windows, things change: if we don't set the PSH flag on > interactive inbound traffic, we can expect long delays before the data > is delivered to the application. > > This is very visible with RDP, where packets we send on behalf of the > RDP client are delivered with delays exceeding one second: > > $ tshark -r rdp.pcap -td -Y 'frame.number in { 33170 .. 33173 }' --disable-protocol tls > 33170 0.030296 93.235.154.248 → 88.198.0.164 54 TCP 49012 → 3389 [ACK] Seq=13820 Ack=285229 Win=387968 Len=0 > 33171 0.985412 88.198.0.164 → 93.235.154.248 105 TCP 3389 → 49012 [PSH, ACK] Seq=285229 Ack=13820 Win=63198 Len=51 > 33172 0.030373 93.235.154.248 → 88.198.0.164 54 TCP 49012 → 3389 [ACK] Seq=13820 Ack=285280 Win=387968 Len=0 > 33173 1.383776 88.198.0.164 → 93.235.154.248 424 TCP 3389 → 49012 [PSH, ACK] Seq=285280 Ack=13820 Win=63198 Len=370 > > in this example (packet capture taken by passt), frame #33172 is a > mouse event sent by the RDP client, and frame #33173 is the first > event (display reacting to click) sent back by the server. This > appears as a 1.4 s delay before we get frame #33173. > > If we set PSH, instead: > > $ tshark -r rdp_psh.pcap -td -Y 'frame.number in { 314 .. 317 }' --disable-protocol tls > 314 0.002503 93.235.154.248 → 88.198.0.164 170 TCP 51066 → 3389 [PSH, ACK] Seq=7779 Ack=74047 Win=31872 Len=116 > 315 0.000557 88.198.0.164 → 93.235.154.248 54 TCP 3389 → 51066 [ACK] Seq=79162 Ack=7895 Win=62872 Len=0 > 316 0.012752 93.235.154.248 → 88.198.0.164 170 TCP 51066 → 3389 [PSH, ACK] Seq=7895 Ack=79162 Win=31872 Len=116 > 317 0.011927 88.198.0.164 → 93.235.154.248 107 TCP 3389 → 51066 [PSH, ACK] Seq=79162 Ack=8011 Win=62756 Len=53 > > here, in frame #116, our mouse event is delivered without a delay and > receives a response in approximately 12 ms. > > Set PSH on the last segment for any batch we dequeue from the socket, > that is, set it whenever we know that we might not be sending data to > the same port for a while. Huh, fascinating. > Reported-by: NN708 > Link: https://bugs.passt.top/show_bug.cgi?id=107 > Signed-off-by: Stefano Brivio Reviewed-by: David Gibson > --- > tcp_buf.c | 11 ++++++++--- > tcp_vu.c | 7 +++++-- > 2 files changed, 13 insertions(+), 5 deletions(-) > > diff --git a/tcp_buf.c b/tcp_buf.c > index cbefa42..72d99c5 100644 > --- a/tcp_buf.c > +++ b/tcp_buf.c > @@ -239,9 +239,10 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) > * @dlen: TCP payload length > * @no_csum: Don't compute IPv4 checksum, use the one from previous buffer > * @seq: Sequence number to be sent > + * @push: Set PSH flag, last segment in a batch > */ > static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn, > - ssize_t dlen, int no_csum, uint32_t seq) > + ssize_t dlen, int no_csum, uint32_t seq, bool push) > { > struct tcp_payload_t *payload; > const uint16_t *check = NULL; > @@ -268,6 +269,7 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn, > payload->th.th_x2 = 0; > payload->th.th_flags = 0; > payload->th.ack = 1; > + payload->th.psh = push; > iov[TCP_IOV_PAYLOAD].iov_len = dlen + sizeof(struct tcphdr); > tcp_l2_buf_fill_headers(conn, iov, check, seq, false); > if (++tcp_payload_used > TCP_FRAMES_MEM - 1) > @@ -402,11 +404,14 @@ int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn) > seq = conn->seq_to_tap; > for (i = 0; i < send_bufs; i++) { > int no_csum = i && i != send_bufs - 1 && tcp_payload_used; > + bool push = false; > > - if (i == send_bufs - 1) > + if (i == send_bufs - 1) { > dlen = last_len; > + push = true; > + } > > - tcp_data_to_tap(c, conn, dlen, no_csum, seq); > + tcp_data_to_tap(c, conn, dlen, no_csum, seq, push); > seq += dlen; > } > > diff --git a/tcp_vu.c b/tcp_vu.c > index a216bb1..fad7065 100644 > --- a/tcp_vu.c > +++ b/tcp_vu.c > @@ -289,10 +289,11 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, > * @iov_cnt: Number of entries in @iov > * @check: Checksum, if already known > * @no_tcp_csum: Do not set TCP checksum > + * @push: Set PSH flag, last segment in a batch > */ > static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn, > struct iovec *iov, size_t iov_cnt, > - const uint16_t **check, bool no_tcp_csum) > + const uint16_t **check, bool no_tcp_csum, bool push) > { > const struct flowside *toside = TAPFLOW(conn); > bool v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)); > @@ -334,6 +335,7 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn, > memset(th, 0, sizeof(*th)); > th->doff = sizeof(*th) / 4; > th->ack = 1; > + th->psh = push; > > tcp_fill_headers(conn, NULL, ip4h, ip6h, th, &payload, > *check, conn->seq_to_tap, no_tcp_csum); > @@ -443,6 +445,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn) > struct iovec *iov = &elem[head[i]].in_sg[0]; > int buf_cnt = head[i + 1] - head[i]; > ssize_t dlen = iov_size(iov, buf_cnt) - hdrlen; > + bool push = i == head_cnt - 1; > > vu_set_vnethdr(vdev, iov->iov_base, buf_cnt); > > @@ -451,7 +454,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn) > check = NULL; > previous_dlen = dlen; > > - tcp_vu_prepare(c, conn, iov, buf_cnt, &check, !*c->pcap); > + tcp_vu_prepare(c, conn, iov, buf_cnt, &check, !*c->pcap, push); > > if (*c->pcap) { > pcap_iov(iov, buf_cnt, -- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson