From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.129.124]) by passt.top (Postfix) with ESMTP id 0B6455A0271 for ; Fri, 2 Feb 2024 15:11:55 +0100 (CET) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1706883115; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=dX2MWuca+v4wEKZg/WFTQadAdheRpgXRvWZXER/LPME=; b=cRj1BxlLRoRIALiqxhHWAwQuguWqHREnBJBhlSEV927ESEWJy3Z0IYUx8u9UZhOyR+nWx+ kEIqWMNoOVkHaxBRnjX7Wu40an9M7nsLtQsctC/HL6w29SFdXiWx5LqFPVqA7iy/1ZzO8Y 1YohGMmG+zbUkkDKLDKcvHU0Id1K6Lk= Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id us-mta-17-LpKbS8tHPuCEEgQBNMLh6Q-1; Fri, 02 Feb 2024 09:11:53 -0500 X-MC-Unique: LpKbS8tHPuCEEgQBNMLh6Q-1 Received: from smtp.corp.redhat.com (int-mx02.intmail.prod.int.rdu2.redhat.com [10.11.54.2]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 993B9185A788 for ; Fri, 2 Feb 2024 14:11:53 +0000 (UTC) Received: from virtlab218.virt.lab.eng.bos.redhat.com (virtlab218.virt.lab.eng.bos.redhat.com [10.19.152.190]) by smtp.corp.redhat.com (Postfix) with ESMTP id 814EF400D784; Fri, 2 Feb 2024 14:11:53 +0000 (UTC) From: Laurent Vivier To: passt-dev@passt.top Subject: [PATCH 08/24] tcp: extract buffer management from tcp_send_flag() Date: Fri, 2 Feb 2024 15:11:35 +0100 Message-ID: <20240202141151.3762941-9-lvivier@redhat.com> In-Reply-To: <20240202141151.3762941-1-lvivier@redhat.com> References: <20240202141151.3762941-1-lvivier@redhat.com> MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.4.1 on 10.11.54.2 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset="US-ASCII"; x-default=true Message-ID-Hash: KFZCSXVOZ2Z6AQOZVFQQRRRQDWKHOPNC X-Message-ID-Hash: KFZCSXVOZ2Z6AQOZVFQQRRRQDWKHOPNC X-MailFrom: lvivier@redhat.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header CC: Laurent Vivier X-Mailman-Version: 3.3.8 Precedence: list List-Id: Development discussion and patches for passt Archived-At: Archived-At: List-Archive: List-Archive: List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: Signed-off-by: Laurent Vivier --- tcp.c | 224 +++++++++++++++++++++++++++++++++------------------------- 1 file changed, 129 insertions(+), 95 deletions(-) diff --git a/tcp.c b/tcp.c index 2fd6bc2eda53..20ad8a4e5271 100644 --- a/tcp.c +++ b/tcp.c @@ -1320,87 +1320,98 @@ void tcp_defer_handler(struct ctx *c) tcp_l2_data_buf_flush(c); } +static void tcp_set_tcp_header(struct tcphdr *th, + const struct tcp_tap_conn *conn, uint32_t seq) +{ + th->source = htons(conn->fport); + th->dest = htons(conn->eport); + th->seq = htonl(seq); + th->ack_seq = htonl(conn->seq_ack_to_tap); + if (conn->events & ESTABLISHED) { + th->window = htons(conn->wnd_to_tap); + } else { + unsigned wnd = conn->wnd_to_tap << conn->ws_to_tap; + + th->window = htons(MIN(wnd, USHRT_MAX)); + } +} + /** - * tcp_l2_buf_fill_headers() - Fill 802.3, IP, TCP headers in pre-cooked buffers + * ipv4_fill_headers() - Fill 802.3, IPv4, TCP headers in pre-cooked buffers * @c: Execution context * @conn: Connection pointer - * @p: Pointer to any type of TCP pre-cooked buffer + * @iph: Pointer to IPv4 header, immediately followed by a TCP header * @plen: Payload length (including TCP header options) * @check: Checksum, if already known * @seq: Sequence number for this segment * - * Return: frame length including L2 headers, host order + * Return: IP frame length including L2 headers, host order */ -static size_t tcp_l2_buf_fill_headers(const struct ctx *c, - const struct tcp_tap_conn *conn, - void *p, size_t plen, - const uint16_t *check, uint32_t seq) + +static size_t ipv4_fill_headers(const struct ctx *c, + const struct tcp_tap_conn *conn, + struct iphdr *iph, size_t plen, + const uint16_t *check, uint32_t seq) { + struct tcphdr *th = (void *)(iph + 1); const struct in_addr *a4 = inany_v4(&conn->faddr); - size_t ip_len, tlen; - -#define SET_TCP_HEADER_COMMON_V4_V6(b, conn, seq) \ -do { \ - b->th.source = htons(conn->fport); \ - b->th.dest = htons(conn->eport); \ - b->th.seq = htonl(seq); \ - b->th.ack_seq = htonl(conn->seq_ack_to_tap); \ - if (conn->events & ESTABLISHED) { \ - b->th.window = htons(conn->wnd_to_tap); \ - } else { \ - unsigned wnd = conn->wnd_to_tap << conn->ws_to_tap; \ - \ - b->th.window = htons(MIN(wnd, USHRT_MAX)); \ - } \ -} while (0) - - if (a4) { - struct tcp4_l2_buf_t *b = (struct tcp4_l2_buf_t *)p; - - ip_len = plen + sizeof(struct iphdr) + sizeof(struct tcphdr); - b->iph.tot_len = htons(ip_len); - b->iph.saddr = a4->s_addr; - b->iph.daddr = c->ip4.addr_seen.s_addr; - - b->iph.check = check ? *check : - ipv4_hdr_checksum(&b->iph, IPPROTO_TCP); - - SET_TCP_HEADER_COMMON_V4_V6(b, conn, seq); - - b->th.check = tcp_update_check_tcp4(&b->iph); - - tlen = tap_iov_len(c, &b->taph, ip_len); - } else { - struct tcp6_l2_buf_t *b = (struct tcp6_l2_buf_t *)p; + size_t ip_len = plen + sizeof(struct iphdr) + sizeof(struct tcphdr); - ip_len = plen + sizeof(struct ipv6hdr) + sizeof(struct tcphdr); + iph->tot_len = htons(ip_len); + iph->saddr = a4->s_addr; + iph->daddr = c->ip4.addr_seen.s_addr; - b->ip6h.payload_len = htons(plen + sizeof(struct tcphdr)); - b->ip6h.saddr = conn->faddr.a6; - if (IN6_IS_ADDR_LINKLOCAL(&b->ip6h.saddr)) - b->ip6h.daddr = c->ip6.addr_ll_seen; - else - b->ip6h.daddr = c->ip6.addr_seen; + iph->check = check ? *check : ipv4_hdr_checksum(iph, IPPROTO_TCP); + + tcp_set_tcp_header(th, conn, seq); + + th->check = tcp_update_check_tcp4(iph); + + return ip_len; +} + +/** + * ipv6_fill_headers() - Fill 802.3, IPv6, TCP headers in pre-cooked buffers + * @c: Execution context + * @conn: Connection pointer + * @ip6h: Pointer to IPv6 header, immediately followed by a TCP header + * @plen: Payload length (including TCP header options) + * @check: Checksum, if already known + * @seq: Sequence number for this segment + * + * Return: IP frame length including L2 headers, host order + */ + +static size_t ipv6_fill_headers(const struct ctx *c, + const struct tcp_tap_conn *conn, + struct ipv6hdr *ip6h, size_t plen, + uint32_t seq) +{ + struct tcphdr *th = (void *)(ip6h + 1); + size_t ip_len = plen + sizeof(struct ipv6hdr) + sizeof(struct tcphdr); - memset(b->ip6h.flow_lbl, 0, 3); + ip6h->payload_len = htons(plen + sizeof(struct tcphdr)); + ip6h->saddr = conn->faddr.a6; + if (IN6_IS_ADDR_LINKLOCAL(&ip6h->saddr)) + ip6h->daddr = c->ip6.addr_ll_seen; + else + ip6h->daddr = c->ip6.addr_seen; - SET_TCP_HEADER_COMMON_V4_V6(b, conn, seq); + memset(ip6h->flow_lbl, 0, 3); - b->th.check = tcp_update_check_tcp6(&b->ip6h); + tcp_set_tcp_header(th, conn, seq); - b->ip6h.hop_limit = 255; - b->ip6h.version = 6; - b->ip6h.nexthdr = IPPROTO_TCP; + th->check = tcp_update_check_tcp6(ip6h); - b->ip6h.flow_lbl[0] = (conn->sock >> 16) & 0xf; - b->ip6h.flow_lbl[1] = (conn->sock >> 8) & 0xff; - b->ip6h.flow_lbl[2] = (conn->sock >> 0) & 0xff; + ip6h->hop_limit = 255; + ip6h->version = 6; + ip6h->nexthdr = IPPROTO_TCP; - tlen = tap_iov_len(c, &b->taph, ip_len); - } -#undef SET_TCP_HEADER_COMMON_V4_V6 + ip6h->flow_lbl[0] = (conn->sock >> 16) & 0xf; + ip6h->flow_lbl[1] = (conn->sock >> 8) & 0xff; + ip6h->flow_lbl[2] = (conn->sock >> 0) & 0xff; - return tlen; + return ip_len; } /** @@ -1520,27 +1531,21 @@ static void tcp_update_seqack_from_tap(const struct ctx *c, } /** - * tcp_send_flag() - Send segment with flags to tap (no payload) + * do_tcp_send_flag() - Send segment with flags to tap (no payload) * @c: Execution context * @conn: Connection pointer * @flags: TCP flags: if not set, send segment only if ACK is due * * Return: negative error code on connection reset, 0 otherwise */ -static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) + +static int do_tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags, struct tcphdr *th, char *data, size_t optlen) { uint32_t prev_ack_to_tap = conn->seq_ack_to_tap; uint32_t prev_wnd_to_tap = conn->wnd_to_tap; - struct tcp4_l2_flags_buf_t *b4 = NULL; - struct tcp6_l2_flags_buf_t *b6 = NULL; struct tcp_info tinfo = { 0 }; socklen_t sl = sizeof(tinfo); int s = conn->sock; - size_t optlen = 0; - struct iovec *iov; - struct tcphdr *th; - char *data; - void *p; if (SEQ_GE(conn->seq_ack_to_tap, conn->seq_from_tap) && !flags && conn->wnd_to_tap) @@ -1562,26 +1567,9 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) if (!tcp_update_seqack_wnd(c, conn, flags, &tinfo) && !flags) return 0; - if (CONN_V4(conn)) { - iov = tcp4_l2_flags_iov + tcp4_l2_flags_buf_used; - p = b4 = tcp4_l2_flags_buf + tcp4_l2_flags_buf_used++; - th = &b4->th; - - /* gcc 11.2 would complain on data = (char *)(th + 1); */ - data = b4->opts; - } else { - iov = tcp6_l2_flags_iov + tcp6_l2_flags_buf_used; - p = b6 = tcp6_l2_flags_buf + tcp6_l2_flags_buf_used++; - th = &b6->th; - data = b6->opts; - } - if (flags & SYN) { int mss; - /* Options: MSS, NOP and window scale (8 bytes) */ - optlen = OPT_MSS_LEN + 1 + OPT_WS_LEN; - *data++ = OPT_MSS; *data++ = OPT_MSS_LEN; @@ -1624,9 +1612,6 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) th->syn = !!(flags & SYN); th->fin = !!(flags & FIN); - iov->iov_len = tcp_l2_buf_fill_headers(c, conn, p, optlen, - NULL, conn->seq_to_tap); - if (th->ack) { if (SEQ_GE(conn->seq_ack_to_tap, conn->seq_from_tap)) conn_flag(c, conn, ~ACK_TO_TAP_DUE); @@ -1641,8 +1626,38 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) if (th->fin || th->syn) conn->seq_to_tap++; + return 1; +} + +static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) +{ + size_t optlen = 0; + struct iovec *iov; + size_t ip_len; + int ret; + + /* Options: MSS, NOP and window scale (8 bytes) */ + if (flags & SYN) + optlen = OPT_MSS_LEN + 1 + OPT_WS_LEN; + if (CONN_V4(conn)) { + struct tcp4_l2_flags_buf_t *b4; + + iov = tcp4_l2_flags_iov + tcp4_l2_flags_buf_used; + b4 = tcp4_l2_flags_buf + tcp4_l2_flags_buf_used++; + + ret = do_tcp_send_flag(c, conn, flags, &b4->th, b4->opts, + optlen); + if (ret <= 0) + return ret; + + ip_len = ipv4_fill_headers(c, conn, &b4->iph, optlen, + NULL, conn->seq_to_tap); + + iov->iov_len = tap_iov_len(c, &b4->taph, ip_len); + if (flags & DUP_ACK) { + memcpy(b4 + 1, b4, sizeof(*b4)); (iov + 1)->iov_len = iov->iov_len; tcp4_l2_flags_buf_used++; @@ -1651,6 +1666,21 @@ static int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags) if (tcp4_l2_flags_buf_used > ARRAY_SIZE(tcp4_l2_flags_buf) - 2) tcp_l2_flags_buf_flush(c); } else { + struct tcp6_l2_flags_buf_t *b6; + + iov = tcp6_l2_flags_iov + tcp6_l2_flags_buf_used; + b6 = tcp6_l2_flags_buf + tcp6_l2_flags_buf_used++; + + ret = do_tcp_send_flag(c, conn, flags, &b6->th, b6->opts, + optlen); + if (ret <= 0) + return ret; + + ip_len = ipv6_fill_headers(c, conn, &b6->ip6h, optlen, + conn->seq_to_tap); + + iov->iov_len = tap_iov_len(c, &b6->taph, ip_len); + if (flags & DUP_ACK) { memcpy(b6 + 1, b6, sizeof(*b6)); (iov + 1)->iov_len = iov->iov_len; @@ -2050,6 +2080,7 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn, { uint32_t *seq_update = &conn->seq_to_tap; struct iovec *iov; + size_t ip_len; if (CONN_V4(conn)) { struct tcp4_l2_buf_t *b = &tcp4_l2_buf[tcp4_l2_buf_used]; @@ -2058,9 +2089,11 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn, tcp4_l2_buf_seq_update[tcp4_l2_buf_used].seq = seq_update; tcp4_l2_buf_seq_update[tcp4_l2_buf_used].len = plen; + ip_len = ipv4_fill_headers(c, conn, &b->iph, plen, + check, seq); + iov = tcp4_l2_iov + tcp4_l2_buf_used++; - iov->iov_len = tcp_l2_buf_fill_headers(c, conn, b, plen, - check, seq); + iov->iov_len = tap_iov_len(c, &b->taph, ip_len); if (tcp4_l2_buf_used > ARRAY_SIZE(tcp4_l2_buf) - 1) tcp_l2_data_buf_flush(c); } else if (CONN_V6(conn)) { @@ -2069,9 +2102,10 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn, tcp6_l2_buf_seq_update[tcp6_l2_buf_used].seq = seq_update; tcp6_l2_buf_seq_update[tcp6_l2_buf_used].len = plen; + ip_len = ipv6_fill_headers(c, conn, &b->ip6h, plen, seq); + iov = tcp6_l2_iov + tcp6_l2_buf_used++; - iov->iov_len = tcp_l2_buf_fill_headers(c, conn, b, plen, - NULL, seq); + iov->iov_len = tap_iov_len(c, &b->taph, ip_len); if (tcp6_l2_buf_used > ARRAY_SIZE(tcp6_l2_buf) - 1) tcp_l2_data_buf_flush(c); } -- 2.42.0