public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>, passt-dev@passt.top
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v3 10/15] icmp: Store ping socket information in the flow table
Date: Thu, 21 Dec 2023 18:02:32 +1100	[thread overview]
Message-ID: <20231221070237.1422557-11-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20231221070237.1422557-1-david@gibson.dropbear.id.au>

Currently icmp_id_map[][] stores information about ping sockets in a
bespoke structure.  Move the same information into a new type of flow that
lives in the flow table.  To match that change, replace the existing ICMP
timer with a flow-based timer for expiring ping sockets.  This has the
advantage that we only need to scan the active flows, not all possible ids.

For now we still index the flow table entries with icmp_id_map[][], and we
don't populate all the common flow information.  This is kind of an abuse
of the flow table, but it's a useful interim step towards full flow table
integration for ICMP.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 Makefile     |   6 +--
 flow.c       |   9 ++++
 flow.h       |   4 ++
 flow_table.h |   2 +
 icmp.c       | 127 +++++++++++++++++++++++----------------------------
 icmp.h       |   2 +-
 icmp_flow.h  |  31 +++++++++++++
 passt.c      |   5 --
 8 files changed, 106 insertions(+), 80 deletions(-)
 create mode 100644 icmp_flow.h

diff --git a/Makefile b/Makefile
index af4fa87..610ab96 100644
--- a/Makefile
+++ b/Makefile
@@ -54,9 +54,9 @@ SRCS = $(PASST_SRCS) $(QRAP_SRCS)
 MANPAGES = passt.1 pasta.1 qrap.1
 
 PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h \
-	flow_table.h icmp.h inany.h isolation.h lineread.h log.h ndp.h \
-	netlink.h packet.h passt.h pasta.h pcap.h pif.h port_fwd.h siphash.h \
-	tap.h tcp.h tcp_conn.h tcp_splice.h udp.h util.h
+	flow_table.h icmp.h icmp_flow.h inany.h isolation.h lineread.h log.h \
+	ndp.h netlink.h packet.h passt.h pasta.h pcap.h pif.h port_fwd.h \
+	siphash.h tap.h tcp.h tcp_conn.h tcp_splice.h udp.h util.h
 HEADERS = $(PASST_HEADERS) seccomp.h
 
 C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_snd_wnd = 0 };
diff --git a/flow.c b/flow.c
index e4ea931..ef146dc 100644
--- a/flow.c
+++ b/flow.c
@@ -22,6 +22,8 @@ const char *flow_type_str[] = {
 	[FLOW_TYPE_NONE]	= "<none>",
 	[FLOW_TCP]		= "TCP connection",
 	[FLOW_TCP_SPLICE]	= "TCP connection (spliced)",
+	[FLOW_PING4]		= "ICMP ping sequence",
+	[FLOW_PING6]		= "ICMPv6 ping sequence",
 };
 static_assert(ARRAY_SIZE(flow_type_str) == FLOW_NUM_TYPES,
 	      "flow_type_str[] doesn't match enum flow_type");
@@ -29,6 +31,8 @@ static_assert(ARRAY_SIZE(flow_type_str) == FLOW_NUM_TYPES,
 const uint8_t flow_proto[] = {
 	[FLOW_TCP]		= IPPROTO_TCP,
 	[FLOW_TCP_SPLICE]	= IPPROTO_TCP,
+	[FLOW_PING4]		= IPPROTO_ICMP,
+	[FLOW_PING6]		= IPPROTO_ICMPV6,
 };
 static_assert(ARRAY_SIZE(flow_proto) == FLOW_NUM_TYPES,
 	      "flow_proto[] doesn't match enum flow_type");
@@ -438,6 +442,11 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
 			if (!closed && timer)
 				tcp_splice_timer(c, flow);
 			break;
+		case FLOW_PING4:
+		case FLOW_PING6:
+			if (timer)
+				closed = icmp_ping_timer(c, flow, now);
+			break;
 		default:
 			/* Assume other flow types don't need any handling */
 			;
diff --git a/flow.h b/flow.h
index e1aeeed..da2a629 100644
--- a/flow.h
+++ b/flow.h
@@ -19,6 +19,10 @@ enum flow_type {
 	FLOW_TCP,
 	/* A TCP connection between a host socket and ns socket */
 	FLOW_TCP_SPLICE,
+	/* ICMP echo requests from guest to host and matching replies back */
+	FLOW_PING4,
+	/* ICMPv6 echo requests from guest to host and matching replies back */
+	FLOW_PING6,
 
 	FLOW_NUM_TYPES,
 };
diff --git a/flow_table.h b/flow_table.h
index 34fc679..091b430 100644
--- a/flow_table.h
+++ b/flow_table.h
@@ -8,6 +8,7 @@
 #define FLOW_TABLE_H
 
 #include "tcp_conn.h"
+#include "icmp_flow.h"
 
 /**
  * struct flow_free_block - Information about a block of free entries
@@ -33,6 +34,7 @@ union flow {
 	struct flow_free_block free;
 	struct tcp_tap_conn tcp;
 	struct tcp_splice_conn tcp_splice;
+	struct icmp_ping_flow ping;
 };
 
 /* Global Flow Table */
diff --git a/icmp.c b/icmp.c
index d669351..27b150d 100644
--- a/icmp.c
+++ b/icmp.c
@@ -37,24 +37,15 @@
 #include "tap.h"
 #include "log.h"
 #include "icmp.h"
+#include "flow_table.h"
 
 #define ICMP_ECHO_TIMEOUT	60 /* s, timeout for ICMP socket activity */
 #define ICMP_NUM_IDS		(1U << 16)
 
-/**
- * struct icmp_id_sock - Tracking information for single ICMP echo identifier
- * @sock:	Bound socket for identifier
- * @seq:	Last sequence number sent to tap, host order, -1: not sent yet
- * @ts:		Last associated activity from tap, seconds
- */
-struct icmp_id_sock {
-	int sock;
-	int seq;
-	time_t ts;
-};
+#define PINGF(idx)		(&(FLOW(idx)->ping))
 
 /* Indexed by ICMP echo identifier */
-static struct icmp_id_sock icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS];
+static struct icmp_ping_flow *icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS];
 
 /**
  * icmp_sock_handler() - Handle new data from ICMP or ICMPv6 socket
@@ -64,8 +55,8 @@ static struct icmp_id_sock icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS];
  */
 void icmp_sock_handler(const struct ctx *c, int af, union epoll_ref ref)
 {
-	struct icmp_id_sock *const id_map = af == AF_INET
-		? &icmp_id_map[V4][ref.icmp.id] : &icmp_id_map[V6][ref.icmp.id];
+	struct icmp_ping_flow *pingf = af == AF_INET
+		? icmp_id_map[V4][ref.icmp.id] : icmp_id_map[V6][ref.icmp.id];
 	const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6";
 	char buf[USHRT_MAX];
 	union {
@@ -80,6 +71,8 @@ void icmp_sock_handler(const struct ctx *c, int af, union epoll_ref ref)
 	if (c->no_icmp)
 		return;
 
+	ASSERT(pingf);
+
 	n = recvfrom(ref.fd, buf, sizeof(buf), 0, &sr.sa, &sl);
 	if (n < 0) {
 		warn("%s: recvfrom() error on ping socket: %s",
@@ -113,10 +106,10 @@ void icmp_sock_handler(const struct ctx *c, int af, union epoll_ref ref)
 	}
 
 	if (c->mode == MODE_PASTA) {
-		if (id_map->seq == seq)
+		if (pingf->seq == seq)
 			return;
 
-		id_map->seq = seq;
+		pingf->seq = seq;
 	}
 
 	debug("%s: echo reply to tap, ID: %"PRIu16", seq: %"PRIu16, pname,
@@ -133,16 +126,19 @@ unexpected:
 }
 
 /**
- * icmp_ping_close() - Close out and cleanup a ping sequence
+ * icmp_ping_close() - Close out and cleanup a ping flow
  * @c:		Execution context
- * @id_map:	id map entry of the sequence to close
+ * @pingf:	ping flow entry to close
  */
-static void icmp_ping_close(const struct ctx *c, struct icmp_id_sock *id_map)
+static void icmp_ping_close(const struct ctx *c, struct icmp_ping_flow *pingf)
 {
-	epoll_ctl(c->epollfd, EPOLL_CTL_DEL, id_map->sock, NULL);
-	close(id_map->sock);
-	id_map->sock = -1;
-	id_map->seq = -1;
+	epoll_ctl(c->epollfd, EPOLL_CTL_DEL, pingf->sock, NULL);
+	close(pingf->sock);
+
+	if (pingf->f.type == FLOW_PING4)
+		icmp_id_map[V4][pingf->id] = NULL;
+	else
+		icmp_id_map[V6][pingf->id] = NULL;
 }
 
 /**
@@ -152,18 +148,24 @@ static void icmp_ping_close(const struct ctx *c, struct icmp_id_sock *id_map)
  * @af:		Address family, AF_INET or AF_INET6
  * @id:		ICMP id for the new sequence
  *
- * Return: Newly opened ping socket fd, or -1 on failure
+ * Return: Newly opened ping flow, or NULL on failure
  */
-static int icmp_ping_new(const struct ctx *c, struct icmp_id_sock *id_map,
-			 int af, uint16_t id)
+static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
+					    struct icmp_ping_flow **id_map,
+					    int af, uint16_t id)
 {
-	uint8_t proto = af == AF_INET ? IPPROTO_ICMP : IPPROTO_ICMPV6;
 	const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6";
+	uint8_t flowtype = af == AF_INET ? FLOW_PING4 : FLOW_PING6;
 	union icmp_epoll_ref iref = { .id = id };
+	union flow *flow = flow_alloc();
+	struct icmp_ping_flow *pingf;
 	const void *bind_addr;
 	const char *bind_if;
 	int s;
 
+	if (!flow)
+		return NULL;
+
 	if (af == AF_INET) {
 		bind_addr = &c->ip4.addr_out;
 		bind_if = c->ip4.ifname_out;
@@ -172,7 +174,7 @@ static int icmp_ping_new(const struct ctx *c, struct icmp_id_sock *id_map,
 		bind_if = c->ip6.ifname_out;
 	}
 
-	s = sock_l4(c, af, proto, bind_addr, bind_if, 0, iref.u32);
+	s = sock_l4(c, af, flow_proto[flowtype], bind_addr, bind_if, 0, iref.u32);
 
 	if (s < 0) {
 		warn("Cannot open \"ping\" socket. You might need to:");
@@ -184,16 +186,23 @@ static int icmp_ping_new(const struct ctx *c, struct icmp_id_sock *id_map,
 	if (s > FD_REF_MAX)
 		goto cancel;
 
-	id_map->sock = s;
+	pingf = &flow->ping;
+	pingf->f.type = flowtype;
+	pingf->seq = -1;
+	pingf->sock = s;
+	pingf->id = id;
+
+	*id_map = pingf;
 
 	debug("%s: new socket %i for echo ID %"PRIu16, pname, s, id);
 
-	return s;
+	return pingf;
 
 cancel:
 	if (s >= 0)
 		close(s);
-	return -1;
+	flow_alloc_cancel(flow);
+	return NULL;
 }
 
 /**
@@ -219,11 +228,10 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, int af,
 		struct sockaddr_in6 sa6;
 	} sa = { .sa.sa_family = af };
 	const socklen_t sl = af == AF_INET ? sizeof(sa.sa4) : sizeof(sa.sa6);
-	struct icmp_id_sock *id_map;
+	struct icmp_ping_flow *pingf, **id_map;
 	uint16_t id, seq;
 	size_t plen;
 	void *pkt;
-	int s;
 
 	(void)saddr;
 	(void)pif;
@@ -263,13 +271,13 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, int af,
 		ASSERT(0);
 	}
 
-	if ((s = id_map->sock) < 0)
-		if ((s = icmp_ping_new(c, id_map, af, id)) < 0)
+	if (!(pingf = *id_map))
+		if (!(pingf = icmp_ping_new(c, id_map, af, id)))
 			return 1;
 
-	id_map->ts = now->tv_sec;
+	pingf->ts = now->tv_sec;
 
-	if (sendto(s, pkt, plen, MSG_NOSIGNAL, &sa.sa, sl) < 0) {
+	if (sendto(pingf->sock, pkt, plen, MSG_NOSIGNAL, &sa.sa, sl) < 0) {
 		debug("%s: failed to relay request to socket: %s",
 		      pname, strerror(errno));
 	} else {
@@ -281,44 +289,21 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, int af,
 }
 
 /**
- * icmp_timer_one() - Handler for timed events related to a given identifier
- * @c:		Execution context
- * @id_map:	id socket information to check timers for
- * @now:	Current timestamp
- */
-static void icmp_timer_one(const struct ctx *c, struct icmp_id_sock *id_map,
-			   const struct timespec *now)
-{
-	if (id_map->sock < 0 || now->tv_sec - id_map->ts <= ICMP_ECHO_TIMEOUT)
-		return;
-
-	icmp_ping_close(c, id_map);
-}
-
-/**
- * icmp_timer() - Scan activity bitmap for identifiers with timed events
+ * icmp_ping_timer() - Handler for timed events related to a given flow
  * @c:		Execution context
+ * @flow:	flow table entry to check for timeout
  * @now:	Current timestamp
+ *
+ * Return: true if the flow is ready to free, false otherwise
  */
-void icmp_timer(const struct ctx *c, const struct timespec *now)
+bool icmp_ping_timer(const struct ctx *c, union flow *flow,
+		     const struct timespec *now)
 {
-	unsigned int i;
-
-	for (i = 0; i < ICMP_NUM_IDS; i++) {
-		icmp_timer_one(c, &icmp_id_map[V4][i], now);
-		icmp_timer_one(c, &icmp_id_map[V6][i], now);
-	}
-}
+	struct icmp_ping_flow *pingf = &flow->ping;
 
-/**
- * icmp_init() - Initialise sequences in ID map to -1 (no sequence sent yet)
- */
-void icmp_init(void)
-{
-	unsigned i;
+	if (now->tv_sec - pingf->ts <= ICMP_ECHO_TIMEOUT)
+		return false;
 
-	for (i = 0; i < ICMP_NUM_IDS; i++) {
-		icmp_id_map[V4][i].seq = icmp_id_map[V6][i].seq = -1;
-		icmp_id_map[V4][i].sock = icmp_id_map[V6][i].sock = -1;
-	}
+	icmp_ping_close(c, pingf);
+	return true;
 }
diff --git a/icmp.h b/icmp.h
index 0083597..2897f31 100644
--- a/icmp.h
+++ b/icmp.h
@@ -9,12 +9,12 @@
 #define ICMP_TIMER_INTERVAL		10000 /* ms */
 
 struct ctx;
+struct icmp_ping_flow;
 
 void icmp_sock_handler(const struct ctx *c, int af, union epoll_ref ref);
 int icmp_tap_handler(const struct ctx *c, uint8_t pif, int af,
 		     const void *saddr, const void *daddr,
 		     const struct pool *p, const struct timespec *now);
-void icmp_timer(const struct ctx *c, const struct timespec *now);
 void icmp_init(void);
 
 /**
diff --git a/icmp_flow.h b/icmp_flow.h
new file mode 100644
index 0000000..7b7f4ee
--- /dev/null
+++ b/icmp_flow.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright Red Hat
+ * Author: David Gibson <david@gibson.dropbear.id.au>
+ *
+ * ICMP flow tracking data structures
+ */
+#ifndef ICMP_FLOW_H
+#define ICMP_FLOW_H
+
+/**
+ * struct icmp_ping_flow - Descriptor for a flow of ping requests/replies
+ * @f:		Generic flow information
+ * @seq:	Last sequence number sent to tap, host order, -1: not sent yet
+ * @sock:	"ping" socket
+ * @ts:		Last associated activity from tap, seconds
+ * @id:		Guest side ICMP ping id for this flow
+ */
+struct icmp_ping_flow {
+	/* Must be first element */
+	struct flow_common f;
+
+	int seq;
+	int sock;
+	time_t ts;
+	uint16_t id;
+};
+
+bool icmp_ping_timer(const struct ctx *c, union flow *flow,
+		     const struct timespec *now);
+
+#endif /* ICMP_FLOW_H */
diff --git a/passt.c b/passt.c
index 44d3a0b..9191404 100644
--- a/passt.c
+++ b/passt.c
@@ -105,8 +105,6 @@ static void post_handler(struct ctx *c, const struct timespec *now)
 	CALL_PROTO_HANDLER(c, now, tcp, TCP);
 	/* NOLINTNEXTLINE(bugprone-branch-clone): intervals can be the same */
 	CALL_PROTO_HANDLER(c, now, udp, UDP);
-	/* NOLINTNEXTLINE(bugprone-branch-clone): intervals can be the same */
-	CALL_PROTO_HANDLER(c, now, icmp, ICMP);
 
 	flow_defer_handler(c, now);
 #undef CALL_PROTO_HANDLER
@@ -290,9 +288,6 @@ int main(int argc, char **argv)
 	if ((!c.no_udp && udp_init(&c)) || (!c.no_tcp && tcp_init(&c)))
 		exit(EXIT_FAILURE);
 
-	if (!c.no_icmp)
-		icmp_init();
-
 	proto_update_l2_buf(c.mac_guest, c.mac);
 
 	if (c.ifi4 && !c.no_dhcp)
-- 
@@ -105,8 +105,6 @@ static void post_handler(struct ctx *c, const struct timespec *now)
 	CALL_PROTO_HANDLER(c, now, tcp, TCP);
 	/* NOLINTNEXTLINE(bugprone-branch-clone): intervals can be the same */
 	CALL_PROTO_HANDLER(c, now, udp, UDP);
-	/* NOLINTNEXTLINE(bugprone-branch-clone): intervals can be the same */
-	CALL_PROTO_HANDLER(c, now, icmp, ICMP);
 
 	flow_defer_handler(c, now);
 #undef CALL_PROTO_HANDLER
@@ -290,9 +288,6 @@ int main(int argc, char **argv)
 	if ((!c.no_udp && udp_init(&c)) || (!c.no_tcp && tcp_init(&c)))
 		exit(EXIT_FAILURE);
 
-	if (!c.no_icmp)
-		icmp_init();
-
 	proto_update_l2_buf(c.mac_guest, c.mac);
 
 	if (c.ifi4 && !c.no_dhcp)
-- 
2.43.0


  parent reply	other threads:[~2023-12-21  7:02 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-21  7:02 [PATCH v3 00/15] RFC: Unified flow table David Gibson
2023-12-21  7:02 ` [PATCH v3 01/15] flow: Common data structures for tracking flow addresses David Gibson
2024-01-13 22:50   ` Stefano Brivio
2024-01-16  6:14     ` David Gibson
2023-12-21  7:02 ` [PATCH v3 02/15] tcp, flow: Maintain guest side flow information David Gibson
2024-01-13 22:51   ` Stefano Brivio
2024-01-16  6:23     ` David Gibson
2023-12-21  7:02 ` [PATCH v3 03/15] tcp, flow: Maintain host " David Gibson
2023-12-21  7:02 ` [PATCH v3 04/15] tcp_splice,flow: Maintain flow information for spliced connections David Gibson
2024-01-17 19:59   ` Stefano Brivio
2024-01-18  1:01     ` David Gibson
2023-12-21  7:02 ` [PATCH v3 05/15] flow, tcp, tcp_splice: Uniform debug helpers for new flows David Gibson
2024-01-17 19:59   ` Stefano Brivio
2024-01-18  1:04     ` David Gibson
2024-01-18 15:40       ` Stefano Brivio
2023-12-21  7:02 ` [PATCH v3 06/15] tcp, flow: Replace TCP specific hash function with general flow hash David Gibson
2024-01-17 19:59   ` Stefano Brivio
2024-01-18  1:15     ` David Gibson
2024-01-18 15:42       ` Stefano Brivio
2024-01-18 23:55         ` David Gibson
2023-12-21  7:02 ` [PATCH v3 07/15] flow: Add helper to determine a flow's protocol David Gibson
2023-12-21  7:02 ` [PATCH v3 08/15] flow, tcp: Generalise TCP hash table to general flow hash table David Gibson
2023-12-21  7:02 ` [PATCH v3 09/15] tcp: Re-use flow hash for initial sequence number generation David Gibson
2023-12-21  7:02 ` David Gibson [this message]
2023-12-21  7:02 ` [PATCH v3 11/15] icmp: Populate guest side information for ping flows David Gibson
2023-12-21  7:02 ` [PATCH v3 12/15] icmp: Populate and use host side flow information David Gibson
2024-01-17 19:59   ` Stefano Brivio
2024-01-18  1:22     ` David Gibson
2024-01-18 15:43       ` Stefano Brivio
2024-01-18 23:58         ` David Gibson
2023-12-21  7:02 ` [PATCH v3 13/15] icmp: Use 'flowside' epoll references for ping sockets David Gibson
2023-12-21  7:02 ` [PATCH v3 14/15] icmp: Merge EPOLL_TYPE_ICMP and EPOLL_TYPE_ICMPV6 David Gibson
2023-12-21  7:02 ` [PATCH v3 15/15] icmp: Eliminate icmp_id_map David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231221070237.1422557-11-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).