public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>
Cc: passt-dev@passt.top
Subject: Re: [PATCH 1/3] icmp: Store ping socket information in flow table
Date: Thu, 7 Mar 2024 21:24:28 +1100	[thread overview]
Message-ID: <ZemV3Ns925Ry32jF@zatzit> (raw)
In-Reply-To: <20240307095315.36eeef59@elisabeth>

[-- Attachment #1: Type: text/plain, Size: 9683 bytes --]

On Thu, Mar 07, 2024 at 09:53:15AM +0100, Stefano Brivio wrote:
> Apologies for the delay, I'm still reviewing 3/3, but I have a question
> meanwhile:
> 
> On Thu, 29 Feb 2024 15:15:32 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > Currently icmp_id_map[][] stores information about ping sockets in a
> > bespoke structure.  Move the same information into new types of flow
> > in the flow table.  To match that change, replace the existing ICMP
> > timer with a flow-based timer for expiring ping sockets.  This has the
> > advantage that we only need to scan the active flows, not all possible
> > ids.
> > 
> > We convert icmp_id_map[][] to point to the flow table entries, rather
> > than containing its own information.  We do still use that array for
> > locating the right ping flows, rather than using a "flow native" form
> > of lookup for the time being.
> > 
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> >  Makefile     |   6 +--
> >  flow.c       |   9 ++++
> >  flow.h       |   4 ++
> >  flow_table.h |   2 +
> >  icmp.c       | 143 +++++++++++++++++++++++----------------------------
> >  icmp.h       |   2 +-
> >  icmp_flow.h  |  31 +++++++++++
> >  passt.c      |   5 --
> >  8 files changed, 115 insertions(+), 87 deletions(-)
> >  create mode 100644 icmp_flow.h
> > 
> > diff --git a/Makefile b/Makefile
> > index 2d6a5155..47fc5448 100644
> > --- a/Makefile
> > +++ b/Makefile
> > @@ -54,9 +54,9 @@ SRCS = $(PASST_SRCS) $(QRAP_SRCS)
> >  MANPAGES = passt.1 pasta.1 qrap.1
> >  
> >  PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h fwd.h \
> > -	flow_table.h icmp.h inany.h iov.h isolation.h lineread.h log.h ndp.h \
> > -	netlink.h packet.h passt.h pasta.h pcap.h pif.h siphash.h tap.h tcp.h \
> > -	tcp_conn.h tcp_splice.h udp.h util.h
> > +	flow_table.h icmp.h icmp_flow.h inany.h iov.h isolation.h lineread.h \
> > +	log.h ndp.h netlink.h packet.h passt.h pasta.h pcap.h pif.h siphash.h \
> > +	tap.h tcp.h tcp_conn.h tcp_splice.h udp.h util.h
> >  HEADERS = $(PASST_HEADERS) seccomp.h
> >  
> >  C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_snd_wnd = 0 };
> > diff --git a/flow.c b/flow.c
> > index d7974d59..5835d6c0 100644
> > --- a/flow.c
> > +++ b/flow.c
> > @@ -21,6 +21,8 @@ const char *flow_type_str[] = {
> >  	[FLOW_TYPE_NONE]	= "<none>",
> >  	[FLOW_TCP]		= "TCP connection",
> >  	[FLOW_TCP_SPLICE]	= "TCP connection (spliced)",
> > +	[FLOW_PING4]		= "ICMP ping sequence",
> > +	[FLOW_PING6]		= "ICMPv6 ping sequence",
> >  };
> >  static_assert(ARRAY_SIZE(flow_type_str) == FLOW_NUM_TYPES,
> >  	      "flow_type_str[] doesn't match enum flow_type");
> > @@ -28,6 +30,8 @@ static_assert(ARRAY_SIZE(flow_type_str) == FLOW_NUM_TYPES,
> >  const uint8_t flow_proto[] = {
> >  	[FLOW_TCP]		= IPPROTO_TCP,
> >  	[FLOW_TCP_SPLICE]	= IPPROTO_TCP,
> > +	[FLOW_PING4]		= IPPROTO_ICMP,
> > +	[FLOW_PING6]		= IPPROTO_ICMPV6,
> >  };
> >  static_assert(ARRAY_SIZE(flow_proto) == FLOW_NUM_TYPES,
> >  	      "flow_proto[] doesn't match enum flow_type");
> > @@ -294,6 +298,11 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
> >  			if (!closed && timer)
> >  				tcp_splice_timer(c, flow);
> >  			break;
> > +		case FLOW_PING4:
> > +		case FLOW_PING6:
> > +			if (timer)
> > +				closed = icmp_ping_timer(c, flow, now);
> > +			break;
> >  		default:
> >  			/* Assume other flow types don't need any handling */
> >  			;
> > diff --git a/flow.h b/flow.h
> > index 8b66751b..c943c441 100644
> > --- a/flow.h
> > +++ b/flow.h
> > @@ -19,6 +19,10 @@ enum flow_type {
> >  	FLOW_TCP,
> >  	/* A TCP connection between a host socket and ns socket */
> >  	FLOW_TCP_SPLICE,
> > +	/* ICMP echo requests from guest to host and matching replies back */
> > +	FLOW_PING4,
> > +	/* ICMPv6 echo requests from guest to host and matching replies back */
> > +	FLOW_PING6,
> >  
> >  	FLOW_NUM_TYPES,
> >  };
> > diff --git a/flow_table.h b/flow_table.h
> > index eecf8844..b7e5529a 100644
> > --- a/flow_table.h
> > +++ b/flow_table.h
> > @@ -8,6 +8,7 @@
> >  #define FLOW_TABLE_H
> >  
> >  #include "tcp_conn.h"
> > +#include "icmp_flow.h"
> >  
> >  /**
> >   * struct flow_free_cluster - Information about a cluster of free entries
> > @@ -33,6 +34,7 @@ union flow {
> >  	struct flow_free_cluster free;
> >  	struct tcp_tap_conn tcp;
> >  	struct tcp_splice_conn tcp_splice;
> > +	struct icmp_ping_flow ping;
> >  };
> >  
> >  /* Global Flow Table */
> > diff --git a/icmp.c b/icmp.c
> > index fb2fcafc..1caf791d 100644
> > --- a/icmp.c
> > +++ b/icmp.c
> > @@ -39,24 +39,17 @@
> >  #include "siphash.h"
> >  #include "inany.h"
> >  #include "icmp.h"
> > +#include "flow_table.h"
> >  
> >  #define ICMP_ECHO_TIMEOUT	60 /* s, timeout for ICMP socket activity */
> >  #define ICMP_NUM_IDS		(1U << 16)
> >  
> > -/**
> > - * struct icmp_id_sock - Tracking information for single ICMP echo identifier
> > - * @sock:	Bound socket for identifier
> > - * @seq:	Last sequence number sent to tap, host order, -1: not sent yet
> > - * @ts:		Last associated activity from tap, seconds
> > - */
> > -struct icmp_id_sock {
> > -	int sock;
> > -	int seq;
> > -	time_t ts;
> > -};
> > +/* Sides of a flow as we use them for ping streams */
> > +#define	SOCKSIDE	0
> > +#define	TAPSIDE		1
> >  
> >  /* Indexed by ICMP echo identifier */
> > -static struct icmp_id_sock icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS];
> > +static struct icmp_ping_flow *icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS];
> >  
> >  /**
> >   * icmp_sock_handler() - Handle new data from ICMP or ICMPv6 socket
> > @@ -66,8 +59,8 @@ static struct icmp_id_sock icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS];
> >   */
> >  void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
> >  {
> > -	struct icmp_id_sock *const id_sock = af == AF_INET
> > -		? &icmp_id_map[V4][ref.icmp.id] : &icmp_id_map[V6][ref.icmp.id];
> > +	struct icmp_ping_flow *pingf = af == AF_INET
> > +		? icmp_id_map[V4][ref.icmp.id] : icmp_id_map[V6][ref.icmp.id];
> >  	const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6";
> >  	union sockaddr_inany sr;
> >  	socklen_t sl = sizeof(sr);
> > @@ -78,6 +71,8 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
> >  	if (c->no_icmp)
> >  		return;
> >  
> > +	ASSERT(pingf);
> > +
> >  	n = recvfrom(ref.fd, buf, sizeof(buf), 0, &sr.sa, &sl);
> >  	if (n < 0) {
> >  		warn("%s: recvfrom() error on ping socket: %s",
> > @@ -112,10 +107,10 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
> >  
> >  	/* In PASTA mode, we'll get any reply we send, discard them. */
> >  	if (c->mode == MODE_PASTA) {
> > -		if (id_sock->seq == seq)
> > +		if (pingf->seq == seq)
> >  			return;
> >  
> > -		id_sock->seq = seq;
> > +		pingf->seq = seq;
> >  	}
> >  
> >  	debug("%s: echo reply to tap, ID: %"PRIu16", seq: %"PRIu16, pname,
> > @@ -132,16 +127,22 @@ unexpected:
> >  }
> >  
> >  /**
> > - * icmp_ping_close() - Close and clean up a ping socket
> > + * icmp_ping_close() - Close and clean up a ping flow
> >   * @c:		Execution context
> > - * @id_sock:	Socket number and other info
> > + * @pingf:	ping flow entry to close
> >   */
> > -static void icmp_ping_close(const struct ctx *c, struct icmp_id_sock *id_sock)
> > +static void icmp_ping_close(const struct ctx *c,
> > +			    const struct icmp_ping_flow *pingf)
> >  {
> > -	epoll_ctl(c->epollfd, EPOLL_CTL_DEL, id_sock->sock, NULL);
> > -	close(id_sock->sock);
> > -	id_sock->sock = -1;
> > -	id_sock->seq = -1;
> > +	uint16_t id = pingf->id;
> > +
> > +	epoll_ctl(c->epollfd, EPOLL_CTL_DEL, pingf->sock, NULL);
> > +	close(pingf->sock);
> > +
> > +	if (pingf->f.type == FLOW_PING4)
> > +		icmp_id_map[V4][id] = NULL;
> > +	else
> > +		icmp_id_map[V6][id] = NULL;
> >  }
> >  
> >  /**
> > @@ -151,17 +152,27 @@ static void icmp_ping_close(const struct ctx *c, struct icmp_id_sock *id_sock)
> >   * @af:		Address family, AF_INET or AF_INET6
> >   * @id:		ICMP id for the new socket
> >   *
> > - * Return: Newly opened ping socket fd, or -1 on failure
> > + * Return: Newly opened ping flow, or NULL on failure
> >   */
> > -static int icmp_ping_new(const struct ctx *c, struct icmp_id_sock *id_sock,
> > -			 sa_family_t af, uint16_t id)
> > +static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
> > +					    struct icmp_ping_flow **id_sock,
> 
> I'm not quite sure why we still need id_sock passed as parameter, and
> what it's supposed to contain (you haven't updated the function
> comment).

Oops.

> Now that all the information is encapsulated in the flow, and you
> return the new flow, with a trivial change in icmp_tap_handler(),
> couldn't we just drop id_sock here?

No, because this is only the "partial" flow table implementation,
lacking the address information in the common part of the flow.
Without that, while the information on a single flow is in the flow
table, we still need the icmp_id_map[] arrays to *find* the relevant
flow.  id_sock passed here is the relevant "slot" in those arrays, so
we can update them to index the new flow (that's why it's a (ping_flow
**) not a (ping_flow *)).

The "full" flow implementation removes those tables, and this
parameter.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

  reply	other threads:[~2024-03-07 10:24 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-29  4:15 [PATCH 0/3] Basic integration of ICMP with flow table David Gibson
2024-02-29  4:15 ` [PATCH 1/3] icmp: Store ping socket information in " David Gibson
2024-03-07  8:53   ` Stefano Brivio
2024-03-07 10:24     ` David Gibson [this message]
2024-03-07 23:25       ` Stefano Brivio
2024-03-08  0:48         ` David Gibson
2024-02-29  4:15 ` [PATCH 2/3] icmp: Flow based error reporting David Gibson
2024-03-07 23:26   ` Stefano Brivio
2024-03-08  0:49     ` David Gibson
2024-03-08  6:06       ` Stefano Brivio
2024-02-29  4:15 ` [PATCH 3/3] icmp: Use 'flowside' epoll references for ping sockets David Gibson
2024-03-13 10:08 ` [PATCH 0/3] Basic integration of ICMP with flow table Stefano Brivio

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZemV3Ns925Ry32jF@zatzit \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).