From mboxrd@z Thu Jan 1 00:00:00 1970 Authentication-Results: passt.top; dmarc=none (p=none dis=none) header.from=gibson.dropbear.id.au Authentication-Results: passt.top; dkim=pass (2048-bit key; secure) header.d=gibson.dropbear.id.au header.i=@gibson.dropbear.id.au header.a=rsa-sha256 header.s=202506 header.b=f1fWqRox; dkim-atps=neutral Received: from mail.ozlabs.org (mail.ozlabs.org [IPv6:2404:9400:2221:ea00::3]) by passt.top (Postfix) with ESMTPS id 7315E5A0271 for ; Thu, 24 Jul 2025 03:20:52 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gibson.dropbear.id.au; s=202506; t=1753319878; bh=IqH3RgsHhS8VDiDPsdSU4Q9VOKO1a81FaaP1uB/z6XE=; h=Date:From:To:Cc:Subject:References:In-Reply-To:From; b=f1fWqRoxIGs65LyQkMjEqjzmcrShhLr805EJZ5wUVJWTVQ1YGh1rTbyalwmfT9YFK pFUqRo0q70Tc+1GWAEqsskERI4z2Wuj5DPZghTZPDOLaXZPgEbLOqWAxt4ADVnrVDC 3co9XrIA7mT65ctT3nfYOF3u/qTnqZpTtvT554VJNsaakwazY7HiIcPXcm9+BbxhbR AbZ25qvIpagRxCcy95nBFE0ZahLJC4hfTJYcLhYN3Gs7HuKdGS5jfUOEJuN5+hv8m3 9a5JPtpa0GT4DDWb9cIT9ThldOUiHF/tTUMHc5lGjY8K7GbqbyGMNRyi2f19Kkn+DH GQXsbeqZtPA6g== Received: by gandalf.ozlabs.org (Postfix, from userid 1007) id 4bnY5y575Cz4x8Y; Thu, 24 Jul 2025 11:17:58 +1000 (AEST) Date: Thu, 24 Jul 2025 11:20:44 +1000 From: David Gibson To: Eugenio =?iso-8859-1?Q?P=E9rez?= Subject: Re: [RFC v2 10/11] tap: add poll(2) to used_idx Message-ID: References: <20250709174748.3514693-1-eperezma@redhat.com> <20250709174748.3514693-11-eperezma@redhat.com> MIME-Version: 1.0 Content-Type: multipart/signed; micalg=pgp-sha512; protocol="application/pgp-signature"; boundary="/eVW+Z7dMhb4Si/R" Content-Disposition: inline In-Reply-To: <20250709174748.3514693-11-eperezma@redhat.com> Message-ID-Hash: AIJ3HHGXT44LJJRRZYAROHL67J44NAOU X-Message-ID-Hash: AIJ3HHGXT44LJJRRZYAROHL67J44NAOU X-MailFrom: dgibson@gandalf.ozlabs.org X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header CC: passt-dev@passt.top, jasowang@redhat.com X-Mailman-Version: 3.3.8 Precedence: list List-Id: Development discussion and patches for passt Archived-At: Archived-At: List-Archive: List-Archive: List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: --/eVW+Z7dMhb4Si/R Content-Type: text/plain; charset=iso-8859-1 Content-Disposition: inline Content-Transfer-Encoding: quoted-printable On Wed, Jul 09, 2025 at 07:47:47PM +0200, Eugenio P=E9rez wrote: > From ~13Gbit/s to ~11.5Gbit/s. Again, I really don't know what you're comparing to what here. >=20 > TODO: Maybe we can reuse epoll for this, not needing to introduce a new > syscall. I really hope so. >=20 > Signed-off-by: Eugenio P=E9rez > --- > tap.c | 59 +++++++++++++++++++++++++++++++++++++++++++++---------- > tap.h | 2 +- > tcp_buf.c | 6 +++--- > 3 files changed, 53 insertions(+), 14 deletions(-) >=20 > diff --git a/tap.c b/tap.c > index 55357e3..93a8c12 100644 > --- a/tap.c > +++ b/tap.c > @@ -19,6 +19,7 @@ > #include > #include > #include > +#include > #include > #include > #include > @@ -120,7 +121,7 @@ static PACKET_POOL_NOINIT(pool_tap6, TAP_MSGS_IP6, pk= t_buf); > #define VHOST_NDESCS (PKT_BUF_BYTES / 65520) > static_assert(!(VHOST_NDESCS & (VHOST_NDESCS - 1)), > "Number of vhost descs must be a power of two by standard"); > -static struct { > +static struct vhost_virtqueue { > /* Descriptor index we're using. This is not the same as avail idx in > * split: this takes into account the chained descs */ > uint16_t vring_idx; > @@ -472,26 +473,63 @@ static void vhost_kick(struct vring_used *used, int= kick_fd) { > eventfd_write(kick_fd, 1); > } >=20 > +/* > + * #syscalls:pasta read poll > + */ > +static uint16_t used_idx(struct vhost_virtqueue *vq, > + struct vring_avail *avail, > + const struct vring_used *used, int pollfd) > +{ > + struct pollfd fds =3D { .fd =3D pollfd, .events =3D POLLIN }; > + int r; > + > + if (vq->shadow_used_idx =3D=3D vq->last_used_idx) > + vq->shadow_used_idx =3D le16toh(used->idx); > + > + if (vq->shadow_used_idx !=3D vq->last_used_idx || pollfd < 0) > + return vq->shadow_used_idx; > + > + avail->flags &=3D ~htole16(1ULL << VRING_AVAIL_F_NO_INTERRUPT); > + /* trusting syscall for smp_wb() */ > + r =3D read(pollfd, (uint64_t[]){0}, sizeof(uint64_t)); > + assert((r < 0 && errno =3D=3D EAGAIN) || r =3D=3D 8); > + > + /* Another oportunity before syscalls */ > + vq->shadow_used_idx =3D le16toh(used->idx); > + if (vq->shadow_used_idx !=3D vq->last_used_idx) { > + return vqs->shadow_used_idx; > + } > + > + r =3D poll(&fds, 1, -1); > + assert (0 < r); > + avail->flags |=3D htole16(1ULL << VRING_AVAIL_F_NO_INTERRUPT); > + vq->shadow_used_idx =3D le16toh(used->idx); > + return vq->shadow_used_idx; I don't understand what this is accomplishing. It seems like it's blocking waiting for a buffer to be freed, which seems like exactly what we don't want to do. > +} > + > /* n =3D target */ > -void tap_free_old_xmit(size_t n) > +size_t tap_free_old_xmit(const struct ctx *c, size_t n) > { > size_t r =3D 0; > + int pollfd =3D (n =3D=3D (size_t)-1) ? -1 : c->vq[1].call_fd; >=20 > while (r < n) { > - uint16_t used_idx =3D vqs[1].last_used_idx; > - if (vqs[1].shadow_used_idx =3D=3D used_idx) { > - vqs[1].shadow_used_idx =3D le16toh(*(volatile uint16_t*)&vring_= used_1.used.idx); > - > - if (vqs[1].shadow_used_idx =3D=3D used_idx) > - continue; > + uint16_t last_used =3D vqs[1].last_used_idx; > + if (used_idx(&vqs[1], &vring_avail_1.avail, &vring_used_1.used, pollfd= ) =3D=3D last_used) { > + assert(pollfd =3D=3D -1); > + return r; > } >=20 > + /* Only get used array entries after they have been exposed by vhost. = */ > + smp_rmb(); > /* assert in-order */ > - assert(vring_used_1.used.ring[used_idx % VHOST_NDESCS].id =3D=3D vring= _avail_1.avail.ring[used_idx % VHOST_NDESCS]); > - vqs[1].num_free +=3D vqs[1].ndescs[used_idx % VHOST_NDESCS]; > + assert(vring_used_1.used.ring[last_used % VHOST_NDESCS].id =3D=3D vrin= g_avail_1.avail.ring[last_used % VHOST_NDESCS]); > + vqs[1].num_free +=3D vqs[1].ndescs[last_used % VHOST_NDESCS]; > vqs[1].last_used_idx++; > r++; > } > + > + return r; > } >=20 > /** > @@ -1687,6 +1725,7 @@ static int tap_ns_tun(void *arg) > if (rc < 0) > die_perror("Failed to add call eventfd to epoll"); > } > + fprintf(stderr, "[eperezma %s:%d][i=3D%d][call_fd=3D%d]\n", __func__, = __LINE__, i, file.fd); > c->vq[i].call_fd =3D file.fd; >=20 > file.fd =3D eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); > diff --git a/tap.h b/tap.h > index 7ca0fb0..7004116 100644 > --- a/tap.h > +++ b/tap.h > @@ -112,7 +112,7 @@ void tap_icmp6_send(const struct ctx *c, > const struct in6_addr *src, const struct in6_addr *dst, > const void *in, size_t l4len); > void tap_send_single(const struct ctx *c, const void *data, size_t l2len= , bool vhost); > -void tap_free_old_xmit(size_t n); > +size_t tap_free_old_xmit(const struct ctx *c, size_t n); > size_t tap_send_frames(const struct ctx *c, const struct iovec *iov, > size_t bufs_per_frame, size_t nframes, bool vhost); > void eth_update_mac(struct ethhdr *eh, > diff --git a/tcp_buf.c b/tcp_buf.c > index 0437120..f74d22d 100644 > --- a/tcp_buf.c > +++ b/tcp_buf.c > @@ -137,10 +137,10 @@ static void tcp_revert_seq(const struct ctx *c, str= uct tcp_tap_conn **conns, > } > } >=20 > -static void tcp_buf_free_old_tap_xmit(void) > +static void tcp_buf_free_old_tap_xmit(const struct ctx *c) > { > while (tcp_payload_tap_used) { > - tap_free_old_xmit(tcp_payload_tap_used); > + tap_free_old_xmit(c, tcp_payload_tap_used); >=20 > tcp_payload_tap_used =3D 0; > tcp_payload_sock_used =3D 0; > @@ -162,7 +162,7 @@ void tcp_payload_flush(const struct ctx *c) > tcp_payload_sock_used - m); > } > tcp_payload_tap_used +=3D m; > - tcp_buf_free_old_tap_xmit(); > + tcp_buf_free_old_tap_xmit(c); > } >=20 > /** --=20 David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson --/eVW+Z7dMhb4Si/R Content-Type: application/pgp-signature; name=signature.asc -----BEGIN PGP SIGNATURE----- iQIzBAEBCgAdFiEEO+dNsU4E3yXUXRK2zQJF27ox2GcFAmiBimsACgkQzQJF27ox 2Gc+bhAAlopAyOms+VzBc2+JT96wrFLeICaR1sGX5uRSsyiZ+g7IuO0FGRtMn/G2 6kokCho6/l65tTK5kjuzk1xTEOvmowrllaGPLW+C9oSd/8lUZhoBq12msLF/13Rs ABAK7CH6uT1oEH7xaGnQy8Mb6nHWH5pA5rSaKkl3trKNRQvsslDpyzEH8dXUuTSr O4336cn9haor4QcviVHy4i7x+G1PRoyefUBm1H8uAsbravauF/U9AGzJH6TdvW7W yXE3yfxK1qjTwp9uw8T41n/zurvod/jIGPK9RyQO30gsJsNqUHmcEFATJAqOfBI8 deA90v5wCCw6phDigpayNd4/8NtjnBIae5+B6Wk3vK/BdNr+1rOaSnmk+Y8ZML0k hYJx2SNdF4phmc2G4G1GczUvNjCfNuq+bFN785yfR/FtwmZVREPc1L6eVK6IFmWQ DqBI6DVwUgJQ3Nw4evcidz6tToBgucx5cRORjW4IvCmE35PvXsW8jSokeHkQAdyZ U+rlMqT5fvrUTO7WielQ1UqqMCW6v1hfbEL1SODfZUsaQ3d9K76R6J6IlwYWIESu HDcEeVzOZH33XZ5tsJtduK9UHB/F/7gY9Bzd4ZBKkJnbIILnuNN8piy1OkSve7hp nMW4ZWyzyo9a+lg2PWnFnkMIkAHngfQqRRIGSSwT/f6VAeH2obs= =1Es5 -----END PGP SIGNATURE----- --/eVW+Z7dMhb4Si/R--