From: Stefano Brivio <sbrivio@redhat.com>
To: David Gibson <david@gibson.dropbear.id.au>
Cc: passt-dev@passt.top
Subject: Re: [PATCH v5 02/19] flow: Make side 0 always be the initiating side
Date: Thu, 16 May 2024 14:06:01 +0200 [thread overview]
Message-ID: <20240516140601.3e2e697c@elisabeth> (raw)
In-Reply-To: <20240514010337.1104606-3-david@gibson.dropbear.id.au>
On Tue, 14 May 2024 11:03:20 +1000
David Gibson <david@gibson.dropbear.id.au> wrote:
> Each flow in the flow table has two sides, 0 and 1, representing the
> two interfaces between which passt/pasta will forward data for that flow.
> Which side is which is currently up to the protocol specific code: TCP
> uses side 0 for the host/"sock" side and 1 for the guest/"tap" side, except
> for spliced connections where it uses 0 for the initiating side and 1 for
> the accepting side. ICMP also uses 0 for the host/"sock" side and 1 for
> the guest/"tap" side, but in its case the latter is always also the
> initiating side.
>
> Make this generically consistent by always using side 0 for the initiating
> side and 1 for the accepting side. This doesn't simplify a lot for now,
> and arguably makes TCP slightly more complex, since we add an extra field
> to the connection structure to record which is the guest facing side.
> This is an interim change, which we'll be able to remove later.
>
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
> flow.c | 5 +----
> flow.h | 5 +++++
> flow_table.h | 6 ++----
> icmp.c | 8 ++------
> tcp.c | 19 ++++++++-----------
> tcp_conn.h | 3 ++-
> tcp_splice.c | 2 +-
> 7 files changed, 21 insertions(+), 27 deletions(-)
>
> diff --git a/flow.c b/flow.c
> index 768e0f6..7456021 100644
> --- a/flow.c
> +++ b/flow.c
> @@ -152,12 +152,10 @@ static void flow_set_state(struct flow_common *f, enum flow_state state)
> * flow_set_type() - Set type and mvoe to TYPED state
> * @flow: Flow to change state
> * @type: Type for new flow
> - * @iniside: Which side initiated the new flow
> *
> * Return: @flow
> */
> -union flow *flow_set_type(union flow *flow, enum flow_type type,
> - unsigned iniside)
> +union flow *flow_set_type(union flow *flow, enum flow_type type)
> {
> struct flow_common *f = &flow->f;
>
> @@ -165,7 +163,6 @@ union flow *flow_set_type(union flow *flow, enum flow_type type,
> ASSERT(flow_new_entry == flow && f->state == FLOW_STATE_NEW);
> ASSERT(f->type == FLOW_TYPE_NONE);
>
> - (void)iniside;
> f->type = type;
> flow_set_state(f, FLOW_STATE_TYPED);
> return flow;
> diff --git a/flow.h b/flow.h
> index 073a734..28169a8 100644
> --- a/flow.h
> +++ b/flow.h
> @@ -95,6 +95,11 @@ extern const uint8_t flow_proto[];
> #define FLOW_PROTO(f) \
> ((f)->type < FLOW_NUM_TYPES ? flow_proto[(f)->type] : 0)
>
> +#define SIDES 2
> +
> +#define INISIDE 0 /* Initiating side */
> +#define FWDSIDE 1 /* Forwarded side */
> +
> /**
> * struct flow_common - Common fields for packet flows
> * @state: State of the flow table entry
> diff --git a/flow_table.h b/flow_table.h
> index 58014d8..7c98195 100644
> --- a/flow_table.h
> +++ b/flow_table.h
> @@ -107,10 +107,8 @@ static inline flow_sidx_t flow_sidx(const struct flow_common *f,
> union flow *flow_alloc(void);
> void flow_alloc_cancel(union flow *flow);
>
> -union flow *flow_set_type(union flow *flow, enum flow_type type,
> - unsigned iniside);
> -#define FLOW_SET_TYPE(flow_, t_, var_, i_) \
> - (&flow_set_type((flow_), (t_), (i_))->var_)
> +union flow *flow_set_type(union flow *flow, enum flow_type type);
> +#define FLOW_SET_TYPE(flow_, t_, var_) (&flow_set_type((flow_), (t_))->var_)
>
> void flow_activate(struct flow_common *f);
> #define FLOW_ACTIVATE(flow_) \
> diff --git a/icmp.c b/icmp.c
> index fda868d..6df0989 100644
> --- a/icmp.c
> +++ b/icmp.c
> @@ -45,10 +45,6 @@
> #define ICMP_ECHO_TIMEOUT 60 /* s, timeout for ICMP socket activity */
> #define ICMP_NUM_IDS (1U << 16)
>
> -/* Sides of a flow as we use them for ping streams */
> -#define SOCKSIDE 0
> -#define TAPSIDE 1
> -
> #define PINGF(idx) (&(FLOW(idx)->ping))
>
> /* Indexed by ICMP echo identifier */
> @@ -167,7 +163,7 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
> if (!flow)
> return NULL;
>
> - pingf = FLOW_SET_TYPE(flow, flowtype, ping, TAPSIDE);
> + pingf = FLOW_SET_TYPE(flow, flowtype, ping);
>
> pingf->seq = -1;
> pingf->id = id;
> @@ -180,7 +176,7 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
> bind_if = c->ip6.ifname_out;
> }
>
> - ref.flowside = FLOW_SIDX(flow, SOCKSIDE);
> + ref.flowside = FLOW_SIDX(flow, FWDSIDE);
> pingf->sock = sock_l4(c, af, flow_proto[flowtype], bind_addr, bind_if,
> 0, ref.data);
>
> diff --git a/tcp.c b/tcp.c
> index 65208ca..06401ba 100644
> --- a/tcp.c
> +++ b/tcp.c
> @@ -303,10 +303,6 @@
>
> #include "flow_table.h"
>
> -/* Sides of a flow as we use them in "tap" connections */
> -#define SOCKSIDE 0
> -#define TAPSIDE 1
> -
> #define TCP_FRAMES_MEM 128
> #define TCP_FRAMES \
> (c->mode == MODE_PASST ? TCP_FRAMES_MEM : 1)
> @@ -581,7 +577,7 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
> {
> int m = conn->in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
> union epoll_ref ref = { .type = EPOLL_TYPE_TCP, .fd = conn->sock,
> - .flowside = FLOW_SIDX(conn, SOCKSIDE) };
> + .flowside = FLOW_SIDX(conn, !conn->tapside), };
> struct epoll_event ev = { .data.u64 = ref.u64 };
>
> if (conn->events == CLOSED) {
> @@ -1134,7 +1130,7 @@ static uint64_t tcp_conn_hash(const struct ctx *c,
> static inline unsigned tcp_hash_probe(const struct ctx *c,
> const struct tcp_tap_conn *conn)
> {
> - flow_sidx_t sidx = FLOW_SIDX(conn, TAPSIDE);
> + flow_sidx_t sidx = FLOW_SIDX(conn, conn->tapside);
> unsigned b = tcp_conn_hash(c, conn) % TCP_HASH_TABLE_SIZE;
>
> /* Linear probing */
> @@ -1154,7 +1150,7 @@ static void tcp_hash_insert(const struct ctx *c, struct tcp_tap_conn *conn)
> {
> unsigned b = tcp_hash_probe(c, conn);
>
> - tc_hash[b] = FLOW_SIDX(conn, TAPSIDE);
> + tc_hash[b] = FLOW_SIDX(conn, conn->tapside);
> flow_dbg(conn, "hash table insert: sock %i, bucket: %u", conn->sock, b);
> }
>
> @@ -2006,7 +2002,8 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
> goto cancel;
> }
>
> - conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp, TAPSIDE);
> + conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp);
> + conn->tapside = INISIDE;
> conn->sock = s;
> conn->timer = -1;
> conn_event(c, conn, TAP_SYN_RCVD);
> @@ -2725,9 +2722,9 @@ static void tcp_tap_conn_from_sock(struct ctx *c, in_port_t dstport,
> const union sockaddr_inany *sa,
> const struct timespec *now)
> {
> - struct tcp_tap_conn *conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp,
> - SOCKSIDE);
> + struct tcp_tap_conn *conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp);
>
> + conn->tapside = FWDSIDE;
> conn->sock = s;
> conn->timer = -1;
> conn->ws_to_tap = conn->ws_from_tap = 0;
> @@ -2884,7 +2881,7 @@ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events)
> struct tcp_tap_conn *conn = CONN(ref.flowside.flow);
>
> ASSERT(conn->f.type == FLOW_TCP);
> - ASSERT(ref.flowside.side == SOCKSIDE);
> + ASSERT(ref.flowside.side == !conn->tapside);
>
> if (conn->events == CLOSED)
> return;
> diff --git a/tcp_conn.h b/tcp_conn.h
> index d280b22..5df0076 100644
> --- a/tcp_conn.h
> +++ b/tcp_conn.h
> @@ -13,6 +13,7 @@
> * struct tcp_tap_conn - Descriptor for a TCP connection (not spliced)
> * @f: Generic flow information
> * @in_epoll: Is the connection in the epoll set?
> + * @tapside: Which side of the flow faces the tap/guest interface
> * @tap_mss: MSS advertised by tap/guest, rounded to 2 ^ TCP_MSS_BITS
> * @sock: Socket descriptor number
> * @events: Connection events, implying connection states
> @@ -39,6 +40,7 @@ struct tcp_tap_conn {
> struct flow_common f;
>
> bool in_epoll :1;
> + unsigned tapside :1;
This is a bit "far" from where the bit meaning is defined (flow.h).
Perhaps, in the comment:
* @tapside: Which side (INISIDE/FWDSIDE) corresponds to the tap/guest interface
?
And this is almost too obvious to ask, but I'm not sure: why isn't this in
flow_common? I guess we'll need it for all the protocols, eventually, right?
Is it because otherwise we have 17 bits there?
>
> #define TCP_RETRANS_BITS 3
> unsigned int retrans :TCP_RETRANS_BITS;
> @@ -106,7 +108,6 @@ struct tcp_tap_conn {
> uint32_t seq_init_from_tap;
> };
>
> -#define SIDES 2
> /**
> * struct tcp_splice_conn - Descriptor for a spliced TCP connection
> * @f: Generic flow information
> diff --git a/tcp_splice.c b/tcp_splice.c
> index abe98a0..5da7021 100644
> --- a/tcp_splice.c
> +++ b/tcp_splice.c
> @@ -472,7 +472,7 @@ bool tcp_splice_conn_from_sock(const struct ctx *c,
> return false;
> }
>
> - conn = FLOW_SET_TYPE(flow, FLOW_TCP_SPLICE, tcp_splice, 0);
> + conn = FLOW_SET_TYPE(flow, FLOW_TCP_SPLICE, tcp_splice);
>
> conn->flags = af == AF_INET ? 0 : SPLICE_V6;
> conn->s[0] = s0;
--
Stefano
next prev parent reply other threads:[~2024-05-16 12:06 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-05-14 1:03 [PATCH v5 00/19] RFC: Unified flow table David Gibson
2024-05-14 1:03 ` [PATCH v5 01/19] flow: Clarify and enforce flow state transitions David Gibson
2024-05-16 9:30 ` Stefano Brivio
[not found] ` <ZkbVxtvmP7f0aL1S@zatzit>
2024-05-17 11:00 ` Stefano Brivio
2024-05-18 6:47 ` David Gibson
2024-05-14 1:03 ` [PATCH v5 02/19] flow: Make side 0 always be the initiating side David Gibson
2024-05-16 12:06 ` Stefano Brivio [this message]
2024-05-14 1:03 ` [PATCH v5 03/19] flow: Record the pifs for each side of each flow David Gibson
2024-05-14 1:03 ` [PATCH v5 04/19] tcp: Remove interim 'tapside' field from connection David Gibson
2024-05-14 1:03 ` [PATCH v5 05/19] flow: Common data structures for tracking flow addresses David Gibson
2024-05-14 1:03 ` [PATCH v5 06/19] flow: Populate address information for initiating side David Gibson
[not found] ` <20240516202337.1b90e5f2@elisabeth>
[not found] ` <ZkbcwkdEwjGv6uwG@zatzit>
[not found] ` <20240517215845.4d09eaae@elisabeth>
2024-05-18 7:00 ` David Gibson
2024-05-14 1:03 ` [PATCH v5 07/19] flow: Populate address information for non-initiating side David Gibson
2024-05-14 1:03 ` [PATCH v5 08/19] tcp, flow: Remove redundant information, repack connection structures David Gibson
2024-05-14 1:03 ` [PATCH v5 09/19] tcp: Obtain guest address from flowside David Gibson
2024-05-14 1:03 ` [PATCH v5 10/19] tcp: Simplify endpoint validation using flowside information David Gibson
2024-05-14 1:03 ` [PATCH v5 11/19] tcp_splice: Eliminate SPLICE_V6 flag David Gibson
2024-05-14 1:03 ` [PATCH v5 12/19] tcp, flow: Replace TCP specific hash function with general flow hash David Gibson
2024-05-14 1:03 ` [PATCH v5 13/19] flow, tcp: Generalise TCP hash table to general flow hash table David Gibson
2024-05-14 1:03 ` [PATCH v5 14/19] tcp: Re-use flow hash for initial sequence number generation David Gibson
2024-05-14 1:03 ` [PATCH v5 15/19] icmp: Use flowsides as the source of truth wherever possible David Gibson
[not found] ` <20240516225350.06aebcd7@elisabeth>
[not found] ` <ZkcAHhCpx3F0SW2K@zatzit>
[not found] ` <20240517221123.1c7197a3@elisabeth>
2024-05-18 7:08 ` David Gibson
2024-05-14 1:03 ` [PATCH v5 16/19] icmp: Look up ping flows using flow hash David Gibson
2024-05-14 1:03 ` [PATCH v5 17/19] icmp: Eliminate icmp_id_map David Gibson
2024-05-14 1:03 ` [PATCH v5 18/19] flow, tcp: Flow based NAT and port forwarding for TCP David Gibson
[not found] ` <20240518001345.2d127b09@elisabeth>
2024-05-20 5:44 ` David Gibson
2024-05-14 1:03 ` [PATCH v5 19/19] flow, icmp: Use general flow forwarding rules for ICMP David Gibson
[not found] ` <20240518001408.004011b2@elisabeth>
2024-05-20 5:56 ` David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240516140601.3e2e697c@elisabeth \
--to=sbrivio@redhat.com \
--cc=david@gibson.dropbear.id.au \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).