public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
* [PATCH v2] udp_vu: Discard datagrams when RX virtqueue is not usable
@ 2026-01-07  8:08 Laurent Vivier
  2026-01-07 23:48 ` David Gibson
  0 siblings, 1 reply; 3+ messages in thread
From: Laurent Vivier @ 2026-01-07  8:08 UTC (permalink / raw)
  To: passt-dev; +Cc: Laurent Vivier

During vhost-user device initialization, UDP datagrams may arrive on
listening sockets before the guest has enabled the RX virtqueue.

When this happens, udp_vu_sock_recv() returns 0 without consuming
the datagram from the socket. The caller, udp_sock_fwd(), uses a
while loop with udp_peek_addr() to process pending datagrams. Since
the datagram remains in the socket buffer, udp_peek_addr() keeps
returning data available, causing a busy loop with 100% CPU usage.

To avoid that, we need to discard the data when the virtqueue is not
ready. udp_buf_sock_to_tap() actually does the same as it reads data
with udp_sock_recv() and if fd_tap is not initialized tap_send_frames()
drops them.

Fixes: 28997fcb29b5 ("vhost-user: add vhost-user")
Link: https://bugs.passt.top/show_bug.cgi?id=185
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---

Notes:
    v2:
      - move recvmsg() from udp_vu_sock_to_tap() to udp_vu_sock_recv()

 udp_vu.c | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/udp_vu.c b/udp_vu.c
index c30dcf97698f..3774d538a2d0 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -65,7 +65,8 @@ static size_t udp_vu_hdrlen(bool v6)
  * @v6:		Set for IPv6 connections
  * @dlen:	Size of received data (output)
  *
- * Return: number of iov entries used to store the datagram
+ * Return: number of iov entries used to store the datagram, 0 if the datagram
+ *         was discarded because the virtqueue is not ready, -1 on error
  */
 static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
 			    bool v6, ssize_t *dlen)
@@ -77,6 +78,15 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
 
 	ASSERT(!c->no_udp);
 
+	if (!vu_queue_enabled(vq) || !vu_queue_started(vq)) {
+		debug("Got UDP packet, but RX virtqueue not usable yet");
+
+		if (recvmsg(s, &msg, MSG_DONTWAIT) < 0)
+			debug_perror("Failed to discard datagram");
+
+		return 0;
+	}
+
 	/* compute L2 header length */
 	hdrlen = udp_vu_hdrlen(v6);
 
@@ -87,7 +97,7 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
 			     sizeof(struct virtio_net_hdr_mrg_rxbuf),
 			     NULL);
 	if (iov_cnt == 0)
-		return 0;
+		return -1;
 
 	/* reserve space for the headers */
 	ASSERT(iov_vu[0].iov_len >= MAX(hdrlen, ETH_ZLEN));
@@ -101,7 +111,7 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
 	*dlen = recvmsg(s, &msg, 0);
 	if (*dlen < 0) {
 		vu_queue_rewind(vq, iov_cnt);
-		return 0;
+		return -1;
 	}
 
 	/* restore the pointer to the headers address */
@@ -216,15 +226,17 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
 		int iov_used;
 
 		iov_used = udp_vu_sock_recv(c, vq, s, v6, &dlen);
-		if (iov_used <= 0)
+		if (iov_used < 0)
 			break;
 
-		udp_vu_prepare(c, toside, dlen);
-		if (*c->pcap) {
-			udp_vu_csum(toside, iov_used);
-			pcap_iov(iov_vu, iov_used,
-				 sizeof(struct virtio_net_hdr_mrg_rxbuf));
+		if (iov_used > 0) {
+			udp_vu_prepare(c, toside, dlen);
+			if (*c->pcap) {
+				udp_vu_csum(toside, iov_used);
+				pcap_iov(iov_vu, iov_used,
+					sizeof(struct virtio_net_hdr_mrg_rxbuf));
+			}
+			vu_flush(vdev, vq, elem, iov_used);
 		}
-		vu_flush(vdev, vq, elem, iov_used);
 	}
 }
-- 
2.52.0


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] udp_vu: Discard datagrams when RX virtqueue is not usable
  2026-01-07  8:08 [PATCH v2] udp_vu: Discard datagrams when RX virtqueue is not usable Laurent Vivier
@ 2026-01-07 23:48 ` David Gibson
  2026-01-08  7:13   ` Laurent Vivier
  0 siblings, 1 reply; 3+ messages in thread
From: David Gibson @ 2026-01-07 23:48 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: passt-dev

[-- Attachment #1: Type: text/plain, Size: 4357 bytes --]

On Wed, Jan 07, 2026 at 09:08:09AM +0100, Laurent Vivier wrote:
> During vhost-user device initialization, UDP datagrams may arrive on
> listening sockets before the guest has enabled the RX virtqueue.
> 
> When this happens, udp_vu_sock_recv() returns 0 without consuming
> the datagram from the socket. The caller, udp_sock_fwd(), uses a
> while loop with udp_peek_addr() to process pending datagrams. Since
> the datagram remains in the socket buffer, udp_peek_addr() keeps
> returning data available, causing a busy loop with 100% CPU usage.
> 
> To avoid that, we need to discard the data when the virtqueue is not
> ready. udp_buf_sock_to_tap() actually does the same as it reads data
> with udp_sock_recv() and if fd_tap is not initialized tap_send_frames()
> drops them.
> 
> Fixes: 28997fcb29b5 ("vhost-user: add vhost-user")
> Link: https://bugs.passt.top/show_bug.cgi?id=185
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

Although one possible nit noted..

> ---
> 
> Notes:
>     v2:
>       - move recvmsg() from udp_vu_sock_to_tap() to udp_vu_sock_recv()
> 
>  udp_vu.c | 32 ++++++++++++++++++++++----------
>  1 file changed, 22 insertions(+), 10 deletions(-)
> 
> diff --git a/udp_vu.c b/udp_vu.c
> index c30dcf97698f..3774d538a2d0 100644
> --- a/udp_vu.c
> +++ b/udp_vu.c
> @@ -65,7 +65,8 @@ static size_t udp_vu_hdrlen(bool v6)
>   * @v6:		Set for IPv6 connections
>   * @dlen:	Size of received data (output)
>   *
> - * Return: number of iov entries used to store the datagram
> + * Return: number of iov entries used to store the datagram, 0 if the datagram
> + *         was discarded because the virtqueue is not ready, -1 on error
>   */
>  static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
>  			    bool v6, ssize_t *dlen)
> @@ -77,6 +78,15 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
>  
>  	ASSERT(!c->no_udp);
>  
> +	if (!vu_queue_enabled(vq) || !vu_queue_started(vq)) {
> +		debug("Got UDP packet, but RX virtqueue not usable yet");
> +
> +		if (recvmsg(s, &msg, MSG_DONTWAIT) < 0)

You use MSG_DONTWAIT here, but you don't on the normal path.  I guess
it shouldn't make a difference, since we've come from epoll so we know
something is waiting for us.  But I think we want to make the paths
look as identical as we can from the point of view of the socket side,
and this makes it a bit less obvious.

> +			debug_perror("Failed to discard datagram");

This also isn't really accurate in the case of EAGAIN / EWOULDBLOCK.

> +
> +		return 0;
> +	}
> +
>  	/* compute L2 header length */
>  	hdrlen = udp_vu_hdrlen(v6);
>  
> @@ -87,7 +97,7 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
>  			     sizeof(struct virtio_net_hdr_mrg_rxbuf),
>  			     NULL);
>  	if (iov_cnt == 0)
> -		return 0;
> +		return -1;
>  
>  	/* reserve space for the headers */
>  	ASSERT(iov_vu[0].iov_len >= MAX(hdrlen, ETH_ZLEN));
> @@ -101,7 +111,7 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
>  	*dlen = recvmsg(s, &msg, 0);
>  	if (*dlen < 0) {
>  		vu_queue_rewind(vq, iov_cnt);
> -		return 0;
> +		return -1;
>  	}
>  
>  	/* restore the pointer to the headers address */
> @@ -216,15 +226,17 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
>  		int iov_used;
>  
>  		iov_used = udp_vu_sock_recv(c, vq, s, v6, &dlen);
> -		if (iov_used <= 0)
> +		if (iov_used < 0)
>  			break;
>  
> -		udp_vu_prepare(c, toside, dlen);
> -		if (*c->pcap) {
> -			udp_vu_csum(toside, iov_used);
> -			pcap_iov(iov_vu, iov_used,
> -				 sizeof(struct virtio_net_hdr_mrg_rxbuf));
> +		if (iov_used > 0) {
> +			udp_vu_prepare(c, toside, dlen);
> +			if (*c->pcap) {
> +				udp_vu_csum(toside, iov_used);
> +				pcap_iov(iov_vu, iov_used,
> +					sizeof(struct virtio_net_hdr_mrg_rxbuf));
> +			}
> +			vu_flush(vdev, vq, elem, iov_used);
>  		}
> -		vu_flush(vdev, vq, elem, iov_used);
>  	}
>  }
> -- 
> 2.52.0
> 

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] udp_vu: Discard datagrams when RX virtqueue is not usable
  2026-01-07 23:48 ` David Gibson
@ 2026-01-08  7:13   ` Laurent Vivier
  0 siblings, 0 replies; 3+ messages in thread
From: Laurent Vivier @ 2026-01-08  7:13 UTC (permalink / raw)
  To: David Gibson; +Cc: passt-dev

On 1/8/26 00:48, David Gibson wrote:
> On Wed, Jan 07, 2026 at 09:08:09AM +0100, Laurent Vivier wrote:
>> During vhost-user device initialization, UDP datagrams may arrive on
>> listening sockets before the guest has enabled the RX virtqueue.
>>
>> When this happens, udp_vu_sock_recv() returns 0 without consuming
>> the datagram from the socket. The caller, udp_sock_fwd(), uses a
>> while loop with udp_peek_addr() to process pending datagrams. Since
>> the datagram remains in the socket buffer, udp_peek_addr() keeps
>> returning data available, causing a busy loop with 100% CPU usage.
>>
>> To avoid that, we need to discard the data when the virtqueue is not
>> ready. udp_buf_sock_to_tap() actually does the same as it reads data
>> with udp_sock_recv() and if fd_tap is not initialized tap_send_frames()
>> drops them.
>>
>> Fixes: 28997fcb29b5 ("vhost-user: add vhost-user")
>> Link: https://bugs.passt.top/show_bug.cgi?id=185
>> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> 
> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> 
> Although one possible nit noted..
> 
>> ---
>>
>> Notes:
>>      v2:
>>        - move recvmsg() from udp_vu_sock_to_tap() to udp_vu_sock_recv()
>>
>>   udp_vu.c | 32 ++++++++++++++++++++++----------
>>   1 file changed, 22 insertions(+), 10 deletions(-)
>>
>> diff --git a/udp_vu.c b/udp_vu.c
>> index c30dcf97698f..3774d538a2d0 100644
>> --- a/udp_vu.c
>> +++ b/udp_vu.c
>> @@ -65,7 +65,8 @@ static size_t udp_vu_hdrlen(bool v6)
>>    * @v6:		Set for IPv6 connections
>>    * @dlen:	Size of received data (output)
>>    *
>> - * Return: number of iov entries used to store the datagram
>> + * Return: number of iov entries used to store the datagram, 0 if the datagram
>> + *         was discarded because the virtqueue is not ready, -1 on error
>>    */
>>   static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
>>   			    bool v6, ssize_t *dlen)
>> @@ -77,6 +78,15 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
>>   
>>   	ASSERT(!c->no_udp);
>>   
>> +	if (!vu_queue_enabled(vq) || !vu_queue_started(vq)) {
>> +		debug("Got UDP packet, but RX virtqueue not usable yet");
>> +
>> +		if (recvmsg(s, &msg, MSG_DONTWAIT) < 0)
> 
> You use MSG_DONTWAIT here, but you don't on the normal path.  I guess
> it shouldn't make a difference, since we've come from epoll so we know
> something is waiting for us.  But I think we want to make the paths
> look as identical as we can from the point of view of the socket side,
> and this makes it a bit less obvious.

This is actually consistent with the existing discard pattern in udp_sock_fwd(). When 
udp_sock_fwd() needs to discard a datagram, it does exactly the same thing:

   if (discard) {
   	struct msghdr msg = { 0 };

   	if (recvmsg(s, &msg, MSG_DONTWAIT) < 0)
   		debug_perror("Failed to discard datagram");
   }

udp_sock_fwd() loops on  udp_peek_addr(), calls udp_vu_sock_to_tap() (and then
udp_vu_sock_recv()). If something has to be discarded it calls recvmsg(s, &msg, MSG_DONTWAIT).

Thanks,
Laurent
> 
>> +			debug_perror("Failed to discard datagram");
> 
> This also isn't really accurate in the case of EAGAIN / EWOULDBLOCK.
> 
>> +
>> +		return 0;
>> +	}
>> +
>>   	/* compute L2 header length */
>>   	hdrlen = udp_vu_hdrlen(v6);
>>   
>> @@ -87,7 +97,7 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
>>   			     sizeof(struct virtio_net_hdr_mrg_rxbuf),
>>   			     NULL);
>>   	if (iov_cnt == 0)
>> -		return 0;
>> +		return -1;
>>   
>>   	/* reserve space for the headers */
>>   	ASSERT(iov_vu[0].iov_len >= MAX(hdrlen, ETH_ZLEN));
>> @@ -101,7 +111,7 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
>>   	*dlen = recvmsg(s, &msg, 0);
>>   	if (*dlen < 0) {
>>   		vu_queue_rewind(vq, iov_cnt);
>> -		return 0;
>> +		return -1;
>>   	}
>>   
>>   	/* restore the pointer to the headers address */
>> @@ -216,15 +226,17 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
>>   		int iov_used;
>>   
>>   		iov_used = udp_vu_sock_recv(c, vq, s, v6, &dlen);
>> -		if (iov_used <= 0)
>> +		if (iov_used < 0)
>>   			break;
>>   
>> -		udp_vu_prepare(c, toside, dlen);
>> -		if (*c->pcap) {
>> -			udp_vu_csum(toside, iov_used);
>> -			pcap_iov(iov_vu, iov_used,
>> -				 sizeof(struct virtio_net_hdr_mrg_rxbuf));
>> +		if (iov_used > 0) {
>> +			udp_vu_prepare(c, toside, dlen);
>> +			if (*c->pcap) {
>> +				udp_vu_csum(toside, iov_used);
>> +				pcap_iov(iov_vu, iov_used,
>> +					sizeof(struct virtio_net_hdr_mrg_rxbuf));
>> +			}
>> +			vu_flush(vdev, vq, elem, iov_used);
>>   		}
>> -		vu_flush(vdev, vq, elem, iov_used);
>>   	}
>>   }
>> -- 
>> 2.52.0
>>
> 


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2026-01-08  7:13 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2026-01-07  8:08 [PATCH v2] udp_vu: Discard datagrams when RX virtqueue is not usable Laurent Vivier
2026-01-07 23:48 ` David Gibson
2026-01-08  7:13   ` Laurent Vivier

Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).