From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mail.ozlabs.org (gandalf.ozlabs.org [150.107.74.76]) by passt.top (Postfix) with ESMTPS id DC4BF5A0319 for ; Wed, 26 Jun 2024 02:34:16 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gibson.dropbear.id.au; s=202312; t=1719362053; bh=NPuWUUX34wPMRYPo2oUZymM4+63x8H5vS3aSMQ02qIo=; h=Date:From:To:Cc:Subject:References:In-Reply-To:From; b=PKgxoCB0fYeNgA/7Kd5wHATVLtos/yHMSo4d6zO8wUFck43dsrP+EW8jMjZJnrAzk YG8VdBYPUPlqQrylVG87NQNZpv8phrkBI+BkLyaMOD+lKF97vdjkpXE9B7Wq5lk6m2 xB5ZGyT214KbMm8oEd3+HBPMiUMz6VGB6250BVYhmBC8GKl5BgPujCnjUGhbjbgRwA INqT2pdnwcjgI//BRGfB26hG5u0qVcEKAZG0GUXV3fUoHPV2ojPL5deSecYcJMN3QQ p+NDj3SymukeaS0eGdf/TljA57/MpzEfd/r2e4PcEaKCOm0vAYt/uibRanwYLI+76K xrQUTUFCsA86Q== Received: by gandalf.ozlabs.org (Postfix, from userid 1007) id 4W82ks25z4z4w2Q; Wed, 26 Jun 2024 10:34:13 +1000 (AEST) Date: Wed, 26 Jun 2024 10:23:49 +1000 From: David Gibson To: Stefano Brivio Subject: Re: [PATCH v6 03/26] tcp, flow: Remove redundant information, repack connection structures Message-ID: References: <20240614061348.3814736-1-david@gibson.dropbear.id.au> <20240614061348.3814736-4-david@gibson.dropbear.id.au> <20240626002505.12f2b3b2@elisabeth> MIME-Version: 1.0 Content-Type: multipart/signed; micalg=pgp-sha256; protocol="application/pgp-signature"; boundary="dj9Fdx3wesXZ/Ca2" Content-Disposition: inline In-Reply-To: <20240626002505.12f2b3b2@elisabeth> Message-ID-Hash: M6OKGUZ5F5ZFZOQHEBCM4I4VPZF5RGQA X-Message-ID-Hash: M6OKGUZ5F5ZFZOQHEBCM4I4VPZF5RGQA X-MailFrom: dgibson@gandalf.ozlabs.org X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header CC: passt-dev@passt.top, jmaloy@redhat.com X-Mailman-Version: 3.3.8 Precedence: list List-Id: Development discussion and patches for passt Archived-At: Archived-At: List-Archive: List-Archive: List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: --dj9Fdx3wesXZ/Ca2 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Content-Transfer-Encoding: quoted-printable On Wed, Jun 26, 2024 at 12:25:05AM +0200, Stefano Brivio wrote: > On Fri, 14 Jun 2024 16:13:25 +1000 > David Gibson wrote: >=20 > > Some information we explicitly store in the TCP connection is now > > duplicated in the common flow structure. Access it from there instead,= and > > remove it from the TCP specific structure. With that done we can reor= der > > both the "tap" and "splice" TCP structures a bit to get better packing = for > > the new combined flow table entries. > >=20 > > Signed-off-by: David Gibson > > --- > > tcp.c | 52 ++++++++++++++++++++++++++------------------------ > > tcp_conn.h | 40 +++++++++++++++----------------------- > > tcp_internal.h | 6 +++++- > > 3 files changed, 47 insertions(+), 51 deletions(-) > >=20 > > diff --git a/tcp.c b/tcp.c > > index c6cd0c72..30ad3dd4 100644 > > --- a/tcp.c > > +++ b/tcp.c > > @@ -333,8 +333,6 @@ > > =20 > > #define ACK_IF_NEEDED 0 /* See tcp_send_flag() */ > > =20 > > -#define TAPSIDE(conn_) ((conn_)->f.pif[1] =3D=3D PIF_TAP) > > - > > #define CONN_IS_CLOSING(conn) \ > > (((conn)->events & ESTABLISHED) && \ > > ((conn)->events & (SOCK_FIN_RCVD | TAP_FIN_RCVD))) > > @@ -635,10 +633,11 @@ void conn_event_do(const struct ctx *c, struct tc= p_tap_conn *conn, > > */ > > static int tcp_rtt_dst_low(const struct tcp_tap_conn *conn) > > { > > + const struct flowside *tapside =3D TAPFLOW(conn); > > int i; > > =20 > > for (i =3D 0; i < LOW_RTT_TABLE_SIZE; i++) > > - if (inany_equals(&conn->faddr, low_rtt_dst + i)) > > + if (inany_equals(&tapside->faddr, low_rtt_dst + i)) > > return 1; > > =20 > > return 0; > > @@ -653,6 +652,7 @@ static void tcp_rtt_dst_check(const struct tcp_tap_= conn *conn, > > const struct tcp_info *tinfo) > > { > > #ifdef HAS_MIN_RTT > > + const struct flowside *tapside =3D TAPFLOW(conn); > > int i, hole =3D -1; > > =20 > > if (!tinfo->tcpi_min_rtt || > > @@ -660,7 +660,7 @@ static void tcp_rtt_dst_check(const struct tcp_tap_= conn *conn, > > return; > > =20 > > for (i =3D 0; i < LOW_RTT_TABLE_SIZE; i++) { > > - if (inany_equals(&conn->faddr, low_rtt_dst + i)) > > + if (inany_equals(&tapside->faddr, low_rtt_dst + i)) > > return; > > if (hole =3D=3D -1 && IN6_IS_ADDR_UNSPECIFIED(low_rtt_dst + i)) > > hole =3D i; > > @@ -672,7 +672,7 @@ static void tcp_rtt_dst_check(const struct tcp_tap_= conn *conn, > > if (hole =3D=3D -1) > > return; > > =20 > > - low_rtt_dst[hole++] =3D conn->faddr; > > + low_rtt_dst[hole++] =3D tapside->faddr; > > if (hole =3D=3D LOW_RTT_TABLE_SIZE) > > hole =3D 0; > > inany_from_af(low_rtt_dst + hole, AF_INET6, &in6addr_any); > > @@ -827,8 +827,10 @@ static int tcp_hash_match(const struct tcp_tap_con= n *conn, > > const union inany_addr *faddr, > > in_port_t eport, in_port_t fport) > > { > > - if (inany_equals(&conn->faddr, faddr) && > > - conn->eport =3D=3D eport && conn->fport =3D=3D fport) > > + const struct flowside *tapside =3D TAPFLOW(conn); > > + > > + if (inany_equals(&tapside->faddr, faddr) && > > + tapside->eport =3D=3D eport && tapside->fport =3D=3D fport) > > return 1; > > =20 > > return 0; > > @@ -862,7 +864,10 @@ static uint64_t tcp_hash(const struct ctx *c, cons= t union inany_addr *faddr, > > static uint64_t tcp_conn_hash(const struct ctx *c, > > const struct tcp_tap_conn *conn) > > { > > - return tcp_hash(c, &conn->faddr, conn->eport, conn->fport); > > + const struct flowside *tapside =3D TAPFLOW(conn); > > + > > + return tcp_hash(c, &tapside->faddr, tapside->eport, > > + tapside->fport); > > } > > =20 > > /** > > @@ -998,10 +1003,12 @@ void tcp_defer_handler(struct ctx *c) > > * @seq: Sequence number > > */ > > static void tcp_fill_header(struct tcphdr *th, > > - const struct tcp_tap_conn *conn, uint32_t seq) > > + const struct tcp_tap_conn *conn, uint32_t seq) > > { > > - th->source =3D htons(conn->fport); > > - th->dest =3D htons(conn->eport); > > + const struct flowside *tapside =3D TAPFLOW(conn); > > + > > + th->source =3D htons(tapside->fport); > > + th->dest =3D htons(tapside->eport); > > th->seq =3D htonl(seq); > > th->ack_seq =3D htonl(conn->seq_ack_to_tap); > > if (conn->events & ESTABLISHED) { > > @@ -1033,7 +1040,8 @@ static size_t tcp_fill_headers4(const struct ctx = *c, > > size_t dlen, const uint16_t *check, > > uint32_t seq) > > { > > - const struct in_addr *a4 =3D inany_v4(&conn->faddr); > > + const struct flowside *tapside =3D TAPFLOW(conn); > > + const struct in_addr *a4 =3D inany_v4(&tapside->faddr); > > size_t l4len =3D dlen + sizeof(*th); > > size_t l3len =3D l4len + sizeof(*iph); > > =20 > > @@ -1075,10 +1083,11 @@ static size_t tcp_fill_headers6(const struct ct= x *c, > > struct ipv6hdr *ip6h, struct tcphdr *th, > > size_t dlen, uint32_t seq) > > { > > + const struct flowside *tapside =3D TAPFLOW(conn); > > size_t l4len =3D dlen + sizeof(*th); > > =20 > > ip6h->payload_len =3D htons(l4len); > > - ip6h->saddr =3D conn->faddr.a6; > > + ip6h->saddr =3D tapside->faddr.a6; > > if (IN6_IS_ADDR_LINKLOCAL(&ip6h->saddr)) > > ip6h->daddr =3D c->ip6.addr_ll_seen; > > else > > @@ -1117,7 +1126,8 @@ size_t tcp_l2_buf_fill_headers(const struct ctx *= c, > > struct iovec *iov, size_t dlen, > > const uint16_t *check, uint32_t seq) > > { > > - const struct in_addr *a4 =3D inany_v4(&conn->faddr); > > + const struct flowside *tapside =3D TAPFLOW(conn); > > + const struct in_addr *a4 =3D inany_v4(&tapside->faddr); > > =20 > > if (a4) { > > return tcp_fill_headers4(c, conn, iov[TCP_IOV_TAP].iov_base, > > @@ -1420,6 +1430,7 @@ static void tcp_seq_init(const struct ctx *c, str= uct tcp_tap_conn *conn, > > const struct timespec *now) > > { > > struct siphash_state state =3D SIPHASH_INIT(c->hash_secret); > > + const struct flowside *tapside =3D TAPFLOW(conn); > > union inany_addr aany; > > uint64_t hash; > > uint32_t ns; > > @@ -1429,10 +1440,10 @@ static void tcp_seq_init(const struct ctx *c, s= truct tcp_tap_conn *conn, > > else > > inany_from_af(&aany, AF_INET6, &c->ip6.addr); > > =20 > > - inany_siphash_feed(&state, &conn->faddr); > > + inany_siphash_feed(&state, &tapside->faddr); > > inany_siphash_feed(&state, &aany); > > hash =3D siphash_final(&state, 36, > > - (uint64_t)conn->fport << 16 | conn->eport); > > + (uint64_t)tapside->fport << 16 | tapside->eport); > > =20 > > /* 32ns ticks, overflows 32 bits every 137s */ > > ns =3D (now->tv_sec * 1000000000 + now->tv_nsec) >> 5; > > @@ -1707,11 +1718,6 @@ static void tcp_conn_from_tap(struct ctx *c, sa_= family_t af, > > if (!(conn->wnd_from_tap =3D (htons(th->window) >> conn->ws_from_tap)= )) > > conn->wnd_from_tap =3D 1; > > =20 > > - inany_from_af(&conn->faddr, af, daddr); > > - > > - conn->fport =3D dstport; > > - conn->eport =3D srcport; > > - > > conn->seq_init_from_tap =3D ntohl(th->seq); > > conn->seq_from_tap =3D conn->seq_init_from_tap + 1; > > conn->seq_ack_to_tap =3D conn->seq_from_tap; > > @@ -2254,10 +2260,6 @@ static void tcp_tap_conn_from_sock(struct ctx *c= , in_port_t dstport, > > conn->ws_to_tap =3D conn->ws_from_tap =3D 0; > > conn_event(c, conn, SOCK_ACCEPTED); > > =20 > > - conn->faddr =3D saddr; > > - conn->fport =3D srcport; > > - conn->eport =3D dstport; > > - > > tcp_seq_init(c, conn, now); > > tcp_hash_insert(c, conn); > > =20 > > diff --git a/tcp_conn.h b/tcp_conn.h > > index 5f8c8fb6..b741ce32 100644 > > --- a/tcp_conn.h > > +++ b/tcp_conn.h > > @@ -13,19 +13,16 @@ > > * struct tcp_tap_conn - Descriptor for a TCP connection (not spliced) > > * @f: Generic flow information > > * @in_epoll: Is the connection in the epoll set? > > + * @retrans: Number of retransmissions occurred due to ACK_TIMEOUT > > + * @ws_from_tap: Window scaling factor advertised from tap/guest > > + * @ws_to_tap: Window scaling factor advertised to tap/guest > > * @tap_mss: MSS advertised by tap/guest, rounded to 2 ^ TCP_MSS_BITS > > * @sock: Socket descriptor number > > * @events: Connection events, implying connection states > > * @timer: timerfd descriptor for timeout events > > * @flags: Connection flags representing internal attributes > > - * @retrans: Number of retransmissions occurred due to ACK_TIMEOUT > > - * @ws_from_tap: Window scaling factor advertised from tap/guest > > - * @ws_to_tap: Window scaling factor advertised to tap/guest > > * @sndbuf: Sending buffer in kernel, rounded to 2 ^ SNDBUF_BITS > > * @seq_dup_ack_approx: Last duplicate ACK number sent to tap > > - * @faddr: Guest side forwarding address (guest's remote address) > > - * @eport: Guest side endpoint port (guest's local port) > > - * @fport: Guest side forwarding port (guest's remote port) > > * @wnd_from_tap: Last window size from tap, unscaled (as received) > > * @wnd_to_tap: Sending window advertised to tap, unscaled (as sent) > > * @seq_to_tap: Next sequence for packets to tap > > @@ -49,6 +46,10 @@ struct tcp_tap_conn { > > unsigned int ws_from_tap :TCP_WS_BITS; > > unsigned int ws_to_tap :TCP_WS_BITS; > > =20 > > +#define TCP_MSS_BITS 14 > > + unsigned int tap_mss :TCP_MSS_BITS; > > +#define MSS_SET(conn, mss) (conn->tap_mss =3D (mss >> (16 - TCP_MSS_BI= TS))) > > +#define MSS_GET(conn) (conn->tap_mss << (16 - TCP_MSS_BITS)) > > =20 > > int sock :FD_REF_BITS; > > =20 > > @@ -77,13 +78,6 @@ struct tcp_tap_conn { > > #define ACK_TO_TAP_DUE BIT(3) > > #define ACK_FROM_TAP_DUE BIT(4) > > =20 > > - > > -#define TCP_MSS_BITS 14 > > - unsigned int tap_mss :TCP_MSS_BITS; > > -#define MSS_SET(conn, mss) (conn->tap_mss =3D (mss >> (16 - TCP_MSS_BI= TS))) > > -#define MSS_GET(conn) (conn->tap_mss << (16 - TCP_MSS_BITS)) > > - > > - > > #define SNDBUF_BITS 24 > > unsigned int sndbuf :SNDBUF_BITS; > > #define SNDBUF_SET(conn, bytes) (conn->sndbuf =3D ((bytes) >> (32 - SN= DBUF_BITS))) > > @@ -91,11 +85,6 @@ struct tcp_tap_conn { > > =20 > > uint8_t seq_dup_ack_approx; > > =20 > > - > > - union inany_addr faddr; > > - in_port_t eport; > > - in_port_t fport; > > - > > uint16_t wnd_from_tap; > > uint16_t wnd_to_tap; > > =20 > > @@ -109,22 +98,24 @@ struct tcp_tap_conn { > > /** > > * struct tcp_splice_conn - Descriptor for a spliced TCP connection > > * @f: Generic flow information > > - * @in_epoll: Is the connection in the epoll set? > > * @s: File descriptor for sockets > > * @pipe: File descriptors for pipes > > - * @events: Events observed/actions performed on connection > > - * @flags: Connection flags (attributes, not events) > > * @read: Bytes read (not fully written to other side in one shot) > > * @written: Bytes written (not fully written from one other side rea= d) > > -*/ > > + * @events: Events observed/actions performed on connection > > + * @flags: Connection flags (attributes, not events) > > + * @in_epoll: Is the connection in the epoll set? > > + */ > > struct tcp_splice_conn { > > /* Must be first element */ > > struct flow_common f; > > =20 > > - bool in_epoll :1; > > int s[SIDES]; > > int pipe[SIDES][2]; > > =20 > > + uint32_t read[SIDES]; > > + uint32_t written[SIDES]; > > + > > uint8_t events; > > #define SPLICE_CLOSED 0 > > #define SPLICE_CONNECT BIT(0) > > @@ -144,8 +135,7 @@ struct tcp_splice_conn { > > #define RCVLOWAT_ACT_1 BIT(4) > > #define CLOSING BIT(5) > > =20 > > - uint32_t read[SIDES]; > > - uint32_t written[SIDES]; > > + bool in_epoll :1; >=20 > Excess tab. Oops, fixed. >=20 > > }; > > =20 > > /* Socket pools */ > > diff --git a/tcp_internal.h b/tcp_internal.h > > index 51aaa169..4f61e5c3 100644 > > --- a/tcp_internal.h > > +++ b/tcp_internal.h > > @@ -39,7 +39,11 @@ > > #define OPT_SACKP 4 > > #define OPT_SACK 5 > > #define OPT_TS 8 > > -#define CONN_V4(conn) (!!inany_v4(&(conn)->faddr)) > > + > > +#define TAPSIDE(conn_) ((conn_)->f.pif[1] =3D=3D PIF_TAP) > > +#define TAPFLOW(conn_) (&((conn_)->f.side[TAPSIDE(conn_)])) > > + > > +#define CONN_V4(conn) (!!inany_v4(&TAPFLOW(conn)->faddr)) > > #define CONN_V6(conn) (!CONN_V4(conn)) > > =20 > > /* >=20 > I reviewed up to 7/26 by the way, no further comments until that point. >=20 --=20 David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson --dj9Fdx3wesXZ/Ca2 Content-Type: application/pgp-signature; name="signature.asc" -----BEGIN PGP SIGNATURE----- iQIzBAEBCAAdFiEEO+dNsU4E3yXUXRK2zQJF27ox2GcFAmZ7X5QACgkQzQJF27ox 2GdlWw//WM6T9R5tdA747X/Em4Ej8jhdf65054b217QYvEkr0yoUNwGl/tv8povn MlyxsD67TutdS7Xudux3g5tv1cVerZgBe2vObm58DxVzIbVa4cgJDqvnpRpoS+kj Qv/f0RYckXWPzI+Fh7fO5FUOth0QrGlzT9ldwB2nf4zpz4gml4vJw0ztNy95Rkm9 xXnWlMnRrFRGDV/bgzRIt4zcKwG+yKCsWWT7UGj5drvSyiWp4LQBeb/LM6P5GPlJ vuDWqlbmyeuNgzzfPxRRIeu7sySr2Mz3yBGr4UgQdhta0xqWcPLXM/NQ29me0xpm V2rh4sGAYGIXb5evo079sZo9sE+vRWwnVJrG/XXcexY0P7cAjQJmwE2Rw6G8sqU6 kXgRu3ZOeqFRL1frcpaR+Kq6E+n9AvfVqdliO7TDxKZiL6h/VktZaElPcnnLFcvx 3XvIi5mPVgKDPYSyb5S65/EK+QozOz5Z9x6SOt1V0RVzy9KnIkkDNCVh9Nkb7G20 GNoGET+9w3Tw427mRjX0rPgQocnI4WgFJ2+1uCKs6xm70OMPBumlDH3bppJ1LTlo sq5BIMKNt90sk/luBhsASBfYJkuXZ6z0wtYN3iu/6NzNYj+Alg6dxVafyr4pMTpZ 1hbyqPPO1yvTifpHfbjv65if/7F6iP1itP4To1mraiHicbNGLts= =xJ5B -----END PGP SIGNATURE----- --dj9Fdx3wesXZ/Ca2--