From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.133.124]) by passt.top (Postfix) with ESMTP id 822C75A004E for ; Wed, 26 Jun 2024 00:25:45 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1719354344; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=y9Swq5sqiSzSiWTQUIn9psV0/AwwrdiSakI19YoNtq8=; b=Yf7r7+3o0ybPkFpTwbuWEWqB3QP/kqp4PzKDsasUO6wziqE3aLnNCzpnEYEeWPUOGDdt56 j+oJq9ssafLCcSo1+9fcxMmC0hrxLc27gcg9PKTOJ8RkSVFnJr0YUYuCWBQ3M2jGfyN5GY H+mO7+TyVsMwolagpN0y2W3e3zaUuYU= Received: from mail-oa1-f69.google.com (mail-oa1-f69.google.com [209.85.160.69]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id us-mta-374-7HOBy-JoPSiQudsyMiZAnQ-1; Tue, 25 Jun 2024 18:25:42 -0400 X-MC-Unique: 7HOBy-JoPSiQudsyMiZAnQ-1 Received: by mail-oa1-f69.google.com with SMTP id 586e51a60fabf-2546dee7270so6460619fac.2 for ; Tue, 25 Jun 2024 15:25:41 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1719354341; x=1719959141; h=content-transfer-encoding:mime-version:organization:references :in-reply-to:message-id:subject:cc:to:from:date:x-gm-message-state :from:to:cc:subject:date:message-id:reply-to; bh=y9Swq5sqiSzSiWTQUIn9psV0/AwwrdiSakI19YoNtq8=; b=qftmgyM85wm0zp6bvE/Xx6M46Wu/5610Wu2NuYQh+Ocq1m34DXb/uyWlft0XlmUpmp Bvx35QHLyBSuGno8uFoW4yQIVDMwAEUApZNetnCDvBpO2m4uZc7FT7ZOUXCzXz8PicWd FxiPA6HMXzMvcUQ2IuTq0MEY8gmfAQl59ysAUnTln6h994rfKsuBadGqnoyk5JnceoUx UD3CwzrLDTuxp9tIyQ6YkniBwb+tlEI4xYf0YVvG8o60bQE3vY4WdDzkVWIbtTGdKQDk 5DgrKM3dM7jqB3Tqhcl9GbX3PZCdQViS07npi0PTdAQkAbkDR/UR/BZR+Q5DqV6Je7JV dJOA== X-Gm-Message-State: AOJu0Yy6sdF57XQILZDp+rmEby5agia3RuCsLA6hTR8/on/rrWDTrIyo ZCB2K6aDe+k500GK106Tlp/GuHAVxemX2/yZLQGoiqIX563nQbOg2Kz7Yg2jYPpjwbranI398h7 uStUOQSLG/2h76NWbW4ZLtl+2gwApsEejkb3H4c4i8UmUgtlH4w== X-Received: by 2002:a05:6870:a11b:b0:259:88b4:97b with SMTP id 586e51a60fabf-25cfcda3680mr10252202fac.5.1719354341060; Tue, 25 Jun 2024 15:25:41 -0700 (PDT) X-Google-Smtp-Source: AGHT+IGYHk1oM9QLXBGwc0ciw6T1hNhxf6Xx5BLx4OKayR49zR7VPvqX6WYuOBQuYtNyoJPR557mjw== X-Received: by 2002:a05:6870:a11b:b0:259:88b4:97b with SMTP id 586e51a60fabf-25cfcda3680mr10252183fac.5.1719354340455; Tue, 25 Jun 2024 15:25:40 -0700 (PDT) Received: from maya.cloud.tilaa.com (maya.cloud.tilaa.com. [164.138.29.33]) by smtp.gmail.com with ESMTPSA id af79cd13be357-79bce93308bsm445114485a.117.2024.06.25.15.25.39 (version=TLS1_2 cipher=ECDHE-ECDSA-AES128-GCM-SHA256 bits=128/128); Tue, 25 Jun 2024 15:25:39 -0700 (PDT) Date: Wed, 26 Jun 2024 00:25:05 +0200 From: Stefano Brivio To: David Gibson Subject: Re: [PATCH v6 03/26] tcp, flow: Remove redundant information, repack connection structures Message-ID: <20240626002505.12f2b3b2@elisabeth> In-Reply-To: <20240614061348.3814736-4-david@gibson.dropbear.id.au> References: <20240614061348.3814736-1-david@gibson.dropbear.id.au> <20240614061348.3814736-4-david@gibson.dropbear.id.au> Organization: Red Hat X-Mailer: Claws Mail 4.2.0 (GTK 3.24.41; x86_64-pc-linux-gnu) MIME-Version: 1.0 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Message-ID-Hash: XTYSSEMLHZUNKUQ5YDSXVJ4U322WKIAR X-Message-ID-Hash: XTYSSEMLHZUNKUQ5YDSXVJ4U322WKIAR X-MailFrom: sbrivio@redhat.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header CC: passt-dev@passt.top, jmaloy@redhat.com X-Mailman-Version: 3.3.8 Precedence: list List-Id: Development discussion and patches for passt Archived-At: Archived-At: List-Archive: List-Archive: List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: On Fri, 14 Jun 2024 16:13:25 +1000 David Gibson wrote: > Some information we explicitly store in the TCP connection is now > duplicated in the common flow structure. Access it from there instead, and > remove it from the TCP specific structure. With that done we can reorder > both the "tap" and "splice" TCP structures a bit to get better packing for > the new combined flow table entries. > > Signed-off-by: David Gibson > --- > tcp.c | 52 ++++++++++++++++++++++++++------------------------ > tcp_conn.h | 40 +++++++++++++++----------------------- > tcp_internal.h | 6 +++++- > 3 files changed, 47 insertions(+), 51 deletions(-) > > diff --git a/tcp.c b/tcp.c > index c6cd0c72..30ad3dd4 100644 > --- a/tcp.c > +++ b/tcp.c > @@ -333,8 +333,6 @@ > > #define ACK_IF_NEEDED 0 /* See tcp_send_flag() */ > > -#define TAPSIDE(conn_) ((conn_)->f.pif[1] == PIF_TAP) > - > #define CONN_IS_CLOSING(conn) \ > (((conn)->events & ESTABLISHED) && \ > ((conn)->events & (SOCK_FIN_RCVD | TAP_FIN_RCVD))) > @@ -635,10 +633,11 @@ void conn_event_do(const struct ctx *c, struct tcp_tap_conn *conn, > */ > static int tcp_rtt_dst_low(const struct tcp_tap_conn *conn) > { > + const struct flowside *tapside = TAPFLOW(conn); > int i; > > for (i = 0; i < LOW_RTT_TABLE_SIZE; i++) > - if (inany_equals(&conn->faddr, low_rtt_dst + i)) > + if (inany_equals(&tapside->faddr, low_rtt_dst + i)) > return 1; > > return 0; > @@ -653,6 +652,7 @@ static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn, > const struct tcp_info *tinfo) > { > #ifdef HAS_MIN_RTT > + const struct flowside *tapside = TAPFLOW(conn); > int i, hole = -1; > > if (!tinfo->tcpi_min_rtt || > @@ -660,7 +660,7 @@ static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn, > return; > > for (i = 0; i < LOW_RTT_TABLE_SIZE; i++) { > - if (inany_equals(&conn->faddr, low_rtt_dst + i)) > + if (inany_equals(&tapside->faddr, low_rtt_dst + i)) > return; > if (hole == -1 && IN6_IS_ADDR_UNSPECIFIED(low_rtt_dst + i)) > hole = i; > @@ -672,7 +672,7 @@ static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn, > if (hole == -1) > return; > > - low_rtt_dst[hole++] = conn->faddr; > + low_rtt_dst[hole++] = tapside->faddr; > if (hole == LOW_RTT_TABLE_SIZE) > hole = 0; > inany_from_af(low_rtt_dst + hole, AF_INET6, &in6addr_any); > @@ -827,8 +827,10 @@ static int tcp_hash_match(const struct tcp_tap_conn *conn, > const union inany_addr *faddr, > in_port_t eport, in_port_t fport) > { > - if (inany_equals(&conn->faddr, faddr) && > - conn->eport == eport && conn->fport == fport) > + const struct flowside *tapside = TAPFLOW(conn); > + > + if (inany_equals(&tapside->faddr, faddr) && > + tapside->eport == eport && tapside->fport == fport) > return 1; > > return 0; > @@ -862,7 +864,10 @@ static uint64_t tcp_hash(const struct ctx *c, const union inany_addr *faddr, > static uint64_t tcp_conn_hash(const struct ctx *c, > const struct tcp_tap_conn *conn) > { > - return tcp_hash(c, &conn->faddr, conn->eport, conn->fport); > + const struct flowside *tapside = TAPFLOW(conn); > + > + return tcp_hash(c, &tapside->faddr, tapside->eport, > + tapside->fport); > } > > /** > @@ -998,10 +1003,12 @@ void tcp_defer_handler(struct ctx *c) > * @seq: Sequence number > */ > static void tcp_fill_header(struct tcphdr *th, > - const struct tcp_tap_conn *conn, uint32_t seq) > + const struct tcp_tap_conn *conn, uint32_t seq) > { > - th->source = htons(conn->fport); > - th->dest = htons(conn->eport); > + const struct flowside *tapside = TAPFLOW(conn); > + > + th->source = htons(tapside->fport); > + th->dest = htons(tapside->eport); > th->seq = htonl(seq); > th->ack_seq = htonl(conn->seq_ack_to_tap); > if (conn->events & ESTABLISHED) { > @@ -1033,7 +1040,8 @@ static size_t tcp_fill_headers4(const struct ctx *c, > size_t dlen, const uint16_t *check, > uint32_t seq) > { > - const struct in_addr *a4 = inany_v4(&conn->faddr); > + const struct flowside *tapside = TAPFLOW(conn); > + const struct in_addr *a4 = inany_v4(&tapside->faddr); > size_t l4len = dlen + sizeof(*th); > size_t l3len = l4len + sizeof(*iph); > > @@ -1075,10 +1083,11 @@ static size_t tcp_fill_headers6(const struct ctx *c, > struct ipv6hdr *ip6h, struct tcphdr *th, > size_t dlen, uint32_t seq) > { > + const struct flowside *tapside = TAPFLOW(conn); > size_t l4len = dlen + sizeof(*th); > > ip6h->payload_len = htons(l4len); > - ip6h->saddr = conn->faddr.a6; > + ip6h->saddr = tapside->faddr.a6; > if (IN6_IS_ADDR_LINKLOCAL(&ip6h->saddr)) > ip6h->daddr = c->ip6.addr_ll_seen; > else > @@ -1117,7 +1126,8 @@ size_t tcp_l2_buf_fill_headers(const struct ctx *c, > struct iovec *iov, size_t dlen, > const uint16_t *check, uint32_t seq) > { > - const struct in_addr *a4 = inany_v4(&conn->faddr); > + const struct flowside *tapside = TAPFLOW(conn); > + const struct in_addr *a4 = inany_v4(&tapside->faddr); > > if (a4) { > return tcp_fill_headers4(c, conn, iov[TCP_IOV_TAP].iov_base, > @@ -1420,6 +1430,7 @@ static void tcp_seq_init(const struct ctx *c, struct tcp_tap_conn *conn, > const struct timespec *now) > { > struct siphash_state state = SIPHASH_INIT(c->hash_secret); > + const struct flowside *tapside = TAPFLOW(conn); > union inany_addr aany; > uint64_t hash; > uint32_t ns; > @@ -1429,10 +1440,10 @@ static void tcp_seq_init(const struct ctx *c, struct tcp_tap_conn *conn, > else > inany_from_af(&aany, AF_INET6, &c->ip6.addr); > > - inany_siphash_feed(&state, &conn->faddr); > + inany_siphash_feed(&state, &tapside->faddr); > inany_siphash_feed(&state, &aany); > hash = siphash_final(&state, 36, > - (uint64_t)conn->fport << 16 | conn->eport); > + (uint64_t)tapside->fport << 16 | tapside->eport); > > /* 32ns ticks, overflows 32 bits every 137s */ > ns = (now->tv_sec * 1000000000 + now->tv_nsec) >> 5; > @@ -1707,11 +1718,6 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af, > if (!(conn->wnd_from_tap = (htons(th->window) >> conn->ws_from_tap))) > conn->wnd_from_tap = 1; > > - inany_from_af(&conn->faddr, af, daddr); > - > - conn->fport = dstport; > - conn->eport = srcport; > - > conn->seq_init_from_tap = ntohl(th->seq); > conn->seq_from_tap = conn->seq_init_from_tap + 1; > conn->seq_ack_to_tap = conn->seq_from_tap; > @@ -2254,10 +2260,6 @@ static void tcp_tap_conn_from_sock(struct ctx *c, in_port_t dstport, > conn->ws_to_tap = conn->ws_from_tap = 0; > conn_event(c, conn, SOCK_ACCEPTED); > > - conn->faddr = saddr; > - conn->fport = srcport; > - conn->eport = dstport; > - > tcp_seq_init(c, conn, now); > tcp_hash_insert(c, conn); > > diff --git a/tcp_conn.h b/tcp_conn.h > index 5f8c8fb6..b741ce32 100644 > --- a/tcp_conn.h > +++ b/tcp_conn.h > @@ -13,19 +13,16 @@ > * struct tcp_tap_conn - Descriptor for a TCP connection (not spliced) > * @f: Generic flow information > * @in_epoll: Is the connection in the epoll set? > + * @retrans: Number of retransmissions occurred due to ACK_TIMEOUT > + * @ws_from_tap: Window scaling factor advertised from tap/guest > + * @ws_to_tap: Window scaling factor advertised to tap/guest > * @tap_mss: MSS advertised by tap/guest, rounded to 2 ^ TCP_MSS_BITS > * @sock: Socket descriptor number > * @events: Connection events, implying connection states > * @timer: timerfd descriptor for timeout events > * @flags: Connection flags representing internal attributes > - * @retrans: Number of retransmissions occurred due to ACK_TIMEOUT > - * @ws_from_tap: Window scaling factor advertised from tap/guest > - * @ws_to_tap: Window scaling factor advertised to tap/guest > * @sndbuf: Sending buffer in kernel, rounded to 2 ^ SNDBUF_BITS > * @seq_dup_ack_approx: Last duplicate ACK number sent to tap > - * @faddr: Guest side forwarding address (guest's remote address) > - * @eport: Guest side endpoint port (guest's local port) > - * @fport: Guest side forwarding port (guest's remote port) > * @wnd_from_tap: Last window size from tap, unscaled (as received) > * @wnd_to_tap: Sending window advertised to tap, unscaled (as sent) > * @seq_to_tap: Next sequence for packets to tap > @@ -49,6 +46,10 @@ struct tcp_tap_conn { > unsigned int ws_from_tap :TCP_WS_BITS; > unsigned int ws_to_tap :TCP_WS_BITS; > > +#define TCP_MSS_BITS 14 > + unsigned int tap_mss :TCP_MSS_BITS; > +#define MSS_SET(conn, mss) (conn->tap_mss = (mss >> (16 - TCP_MSS_BITS))) > +#define MSS_GET(conn) (conn->tap_mss << (16 - TCP_MSS_BITS)) > > int sock :FD_REF_BITS; > > @@ -77,13 +78,6 @@ struct tcp_tap_conn { > #define ACK_TO_TAP_DUE BIT(3) > #define ACK_FROM_TAP_DUE BIT(4) > > - > -#define TCP_MSS_BITS 14 > - unsigned int tap_mss :TCP_MSS_BITS; > -#define MSS_SET(conn, mss) (conn->tap_mss = (mss >> (16 - TCP_MSS_BITS))) > -#define MSS_GET(conn) (conn->tap_mss << (16 - TCP_MSS_BITS)) > - > - > #define SNDBUF_BITS 24 > unsigned int sndbuf :SNDBUF_BITS; > #define SNDBUF_SET(conn, bytes) (conn->sndbuf = ((bytes) >> (32 - SNDBUF_BITS))) > @@ -91,11 +85,6 @@ struct tcp_tap_conn { > > uint8_t seq_dup_ack_approx; > > - > - union inany_addr faddr; > - in_port_t eport; > - in_port_t fport; > - > uint16_t wnd_from_tap; > uint16_t wnd_to_tap; > > @@ -109,22 +98,24 @@ struct tcp_tap_conn { > /** > * struct tcp_splice_conn - Descriptor for a spliced TCP connection > * @f: Generic flow information > - * @in_epoll: Is the connection in the epoll set? > * @s: File descriptor for sockets > * @pipe: File descriptors for pipes > - * @events: Events observed/actions performed on connection > - * @flags: Connection flags (attributes, not events) > * @read: Bytes read (not fully written to other side in one shot) > * @written: Bytes written (not fully written from one other side read) > -*/ > + * @events: Events observed/actions performed on connection > + * @flags: Connection flags (attributes, not events) > + * @in_epoll: Is the connection in the epoll set? > + */ > struct tcp_splice_conn { > /* Must be first element */ > struct flow_common f; > > - bool in_epoll :1; > int s[SIDES]; > int pipe[SIDES][2]; > > + uint32_t read[SIDES]; > + uint32_t written[SIDES]; > + > uint8_t events; > #define SPLICE_CLOSED 0 > #define SPLICE_CONNECT BIT(0) > @@ -144,8 +135,7 @@ struct tcp_splice_conn { > #define RCVLOWAT_ACT_1 BIT(4) > #define CLOSING BIT(5) > > - uint32_t read[SIDES]; > - uint32_t written[SIDES]; > + bool in_epoll :1; Excess tab. > }; > > /* Socket pools */ > diff --git a/tcp_internal.h b/tcp_internal.h > index 51aaa169..4f61e5c3 100644 > --- a/tcp_internal.h > +++ b/tcp_internal.h > @@ -39,7 +39,11 @@ > #define OPT_SACKP 4 > #define OPT_SACK 5 > #define OPT_TS 8 > -#define CONN_V4(conn) (!!inany_v4(&(conn)->faddr)) > + > +#define TAPSIDE(conn_) ((conn_)->f.pif[1] == PIF_TAP) > +#define TAPFLOW(conn_) (&((conn_)->f.side[TAPSIDE(conn_)])) > + > +#define CONN_V4(conn) (!!inany_v4(&TAPFLOW(conn)->faddr)) > #define CONN_V6(conn) (!CONN_V4(conn)) > > /* I reviewed up to 7/26 by the way, no further comments until that point. -- Stefano