public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
* [PATCH 0/3] Basic integration of ICMP with flow table
@ 2024-02-29  4:15 David Gibson
  2024-02-29  4:15 ` [PATCH 1/3] icmp: Store ping socket information in " David Gibson
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: David Gibson @ 2024-02-29  4:15 UTC (permalink / raw)
  To: Stefano Brivio, passt-dev; +Cc: David Gibson

I realized that even with the rudimentary form of the flow table we
have now, it's possible to integrate ICMP (ping).  This even
simplifies a little bit of logic in the ICMP code.

This is based on the address handling clean up series.

David Gibson (3):
  icmp: Store ping socket information in flow table
  icmp: Flow based error reporting
  icmp: Use 'flowside' epoll references for ping sockets

 Makefile     |   6 +-
 flow.c       |   9 +++
 flow.h       |   4 ++
 flow_table.h |   2 +
 icmp.c       | 194 +++++++++++++++++++++++----------------------------
 icmp.h       |  15 +---
 icmp_flow.h  |  31 ++++++++
 passt.c      |  15 +---
 passt.h      |   7 +-
 util.c       |   4 +-
 10 files changed, 146 insertions(+), 141 deletions(-)
 create mode 100644 icmp_flow.h

-- 
2.44.0


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/3] icmp: Store ping socket information in flow table
  2024-02-29  4:15 [PATCH 0/3] Basic integration of ICMP with flow table David Gibson
@ 2024-02-29  4:15 ` David Gibson
  2024-03-07  8:53   ` Stefano Brivio
  2024-02-29  4:15 ` [PATCH 2/3] icmp: Flow based error reporting David Gibson
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 12+ messages in thread
From: David Gibson @ 2024-02-29  4:15 UTC (permalink / raw)
  To: Stefano Brivio, passt-dev; +Cc: David Gibson

Currently icmp_id_map[][] stores information about ping sockets in a
bespoke structure.  Move the same information into new types of flow
in the flow table.  To match that change, replace the existing ICMP
timer with a flow-based timer for expiring ping sockets.  This has the
advantage that we only need to scan the active flows, not all possible
ids.

We convert icmp_id_map[][] to point to the flow table entries, rather
than containing its own information.  We do still use that array for
locating the right ping flows, rather than using a "flow native" form
of lookup for the time being.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 Makefile     |   6 +--
 flow.c       |   9 ++++
 flow.h       |   4 ++
 flow_table.h |   2 +
 icmp.c       | 143 +++++++++++++++++++++++----------------------------
 icmp.h       |   2 +-
 icmp_flow.h  |  31 +++++++++++
 passt.c      |   5 --
 8 files changed, 115 insertions(+), 87 deletions(-)
 create mode 100644 icmp_flow.h

diff --git a/Makefile b/Makefile
index 2d6a5155..47fc5448 100644
--- a/Makefile
+++ b/Makefile
@@ -54,9 +54,9 @@ SRCS = $(PASST_SRCS) $(QRAP_SRCS)
 MANPAGES = passt.1 pasta.1 qrap.1
 
 PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h fwd.h \
-	flow_table.h icmp.h inany.h iov.h isolation.h lineread.h log.h ndp.h \
-	netlink.h packet.h passt.h pasta.h pcap.h pif.h siphash.h tap.h tcp.h \
-	tcp_conn.h tcp_splice.h udp.h util.h
+	flow_table.h icmp.h icmp_flow.h inany.h iov.h isolation.h lineread.h \
+	log.h ndp.h netlink.h packet.h passt.h pasta.h pcap.h pif.h siphash.h \
+	tap.h tcp.h tcp_conn.h tcp_splice.h udp.h util.h
 HEADERS = $(PASST_HEADERS) seccomp.h
 
 C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_snd_wnd = 0 };
diff --git a/flow.c b/flow.c
index d7974d59..5835d6c0 100644
--- a/flow.c
+++ b/flow.c
@@ -21,6 +21,8 @@ const char *flow_type_str[] = {
 	[FLOW_TYPE_NONE]	= "<none>",
 	[FLOW_TCP]		= "TCP connection",
 	[FLOW_TCP_SPLICE]	= "TCP connection (spliced)",
+	[FLOW_PING4]		= "ICMP ping sequence",
+	[FLOW_PING6]		= "ICMPv6 ping sequence",
 };
 static_assert(ARRAY_SIZE(flow_type_str) == FLOW_NUM_TYPES,
 	      "flow_type_str[] doesn't match enum flow_type");
@@ -28,6 +30,8 @@ static_assert(ARRAY_SIZE(flow_type_str) == FLOW_NUM_TYPES,
 const uint8_t flow_proto[] = {
 	[FLOW_TCP]		= IPPROTO_TCP,
 	[FLOW_TCP_SPLICE]	= IPPROTO_TCP,
+	[FLOW_PING4]		= IPPROTO_ICMP,
+	[FLOW_PING6]		= IPPROTO_ICMPV6,
 };
 static_assert(ARRAY_SIZE(flow_proto) == FLOW_NUM_TYPES,
 	      "flow_proto[] doesn't match enum flow_type");
@@ -294,6 +298,11 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
 			if (!closed && timer)
 				tcp_splice_timer(c, flow);
 			break;
+		case FLOW_PING4:
+		case FLOW_PING6:
+			if (timer)
+				closed = icmp_ping_timer(c, flow, now);
+			break;
 		default:
 			/* Assume other flow types don't need any handling */
 			;
diff --git a/flow.h b/flow.h
index 8b66751b..c943c441 100644
--- a/flow.h
+++ b/flow.h
@@ -19,6 +19,10 @@ enum flow_type {
 	FLOW_TCP,
 	/* A TCP connection between a host socket and ns socket */
 	FLOW_TCP_SPLICE,
+	/* ICMP echo requests from guest to host and matching replies back */
+	FLOW_PING4,
+	/* ICMPv6 echo requests from guest to host and matching replies back */
+	FLOW_PING6,
 
 	FLOW_NUM_TYPES,
 };
diff --git a/flow_table.h b/flow_table.h
index eecf8844..b7e5529a 100644
--- a/flow_table.h
+++ b/flow_table.h
@@ -8,6 +8,7 @@
 #define FLOW_TABLE_H
 
 #include "tcp_conn.h"
+#include "icmp_flow.h"
 
 /**
  * struct flow_free_cluster - Information about a cluster of free entries
@@ -33,6 +34,7 @@ union flow {
 	struct flow_free_cluster free;
 	struct tcp_tap_conn tcp;
 	struct tcp_splice_conn tcp_splice;
+	struct icmp_ping_flow ping;
 };
 
 /* Global Flow Table */
diff --git a/icmp.c b/icmp.c
index fb2fcafc..1caf791d 100644
--- a/icmp.c
+++ b/icmp.c
@@ -39,24 +39,17 @@
 #include "siphash.h"
 #include "inany.h"
 #include "icmp.h"
+#include "flow_table.h"
 
 #define ICMP_ECHO_TIMEOUT	60 /* s, timeout for ICMP socket activity */
 #define ICMP_NUM_IDS		(1U << 16)
 
-/**
- * struct icmp_id_sock - Tracking information for single ICMP echo identifier
- * @sock:	Bound socket for identifier
- * @seq:	Last sequence number sent to tap, host order, -1: not sent yet
- * @ts:		Last associated activity from tap, seconds
- */
-struct icmp_id_sock {
-	int sock;
-	int seq;
-	time_t ts;
-};
+/* Sides of a flow as we use them for ping streams */
+#define	SOCKSIDE	0
+#define	TAPSIDE		1
 
 /* Indexed by ICMP echo identifier */
-static struct icmp_id_sock icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS];
+static struct icmp_ping_flow *icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS];
 
 /**
  * icmp_sock_handler() - Handle new data from ICMP or ICMPv6 socket
@@ -66,8 +59,8 @@ static struct icmp_id_sock icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS];
  */
 void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
 {
-	struct icmp_id_sock *const id_sock = af == AF_INET
-		? &icmp_id_map[V4][ref.icmp.id] : &icmp_id_map[V6][ref.icmp.id];
+	struct icmp_ping_flow *pingf = af == AF_INET
+		? icmp_id_map[V4][ref.icmp.id] : icmp_id_map[V6][ref.icmp.id];
 	const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6";
 	union sockaddr_inany sr;
 	socklen_t sl = sizeof(sr);
@@ -78,6 +71,8 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
 	if (c->no_icmp)
 		return;
 
+	ASSERT(pingf);
+
 	n = recvfrom(ref.fd, buf, sizeof(buf), 0, &sr.sa, &sl);
 	if (n < 0) {
 		warn("%s: recvfrom() error on ping socket: %s",
@@ -112,10 +107,10 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
 
 	/* In PASTA mode, we'll get any reply we send, discard them. */
 	if (c->mode == MODE_PASTA) {
-		if (id_sock->seq == seq)
+		if (pingf->seq == seq)
 			return;
 
-		id_sock->seq = seq;
+		pingf->seq = seq;
 	}
 
 	debug("%s: echo reply to tap, ID: %"PRIu16", seq: %"PRIu16, pname,
@@ -132,16 +127,22 @@ unexpected:
 }
 
 /**
- * icmp_ping_close() - Close and clean up a ping socket
+ * icmp_ping_close() - Close and clean up a ping flow
  * @c:		Execution context
- * @id_sock:	Socket number and other info
+ * @pingf:	ping flow entry to close
  */
-static void icmp_ping_close(const struct ctx *c, struct icmp_id_sock *id_sock)
+static void icmp_ping_close(const struct ctx *c,
+			    const struct icmp_ping_flow *pingf)
 {
-	epoll_ctl(c->epollfd, EPOLL_CTL_DEL, id_sock->sock, NULL);
-	close(id_sock->sock);
-	id_sock->sock = -1;
-	id_sock->seq = -1;
+	uint16_t id = pingf->id;
+
+	epoll_ctl(c->epollfd, EPOLL_CTL_DEL, pingf->sock, NULL);
+	close(pingf->sock);
+
+	if (pingf->f.type == FLOW_PING4)
+		icmp_id_map[V4][id] = NULL;
+	else
+		icmp_id_map[V6][id] = NULL;
 }
 
 /**
@@ -151,17 +152,27 @@ static void icmp_ping_close(const struct ctx *c, struct icmp_id_sock *id_sock)
  * @af:		Address family, AF_INET or AF_INET6
  * @id:		ICMP id for the new socket
  *
- * Return: Newly opened ping socket fd, or -1 on failure
+ * Return: Newly opened ping flow, or NULL on failure
  */
-static int icmp_ping_new(const struct ctx *c, struct icmp_id_sock *id_sock,
-			 sa_family_t af, uint16_t id)
+static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
+					    struct icmp_ping_flow **id_sock,
+					    sa_family_t af, uint16_t id)
 {
-	uint8_t proto = af == AF_INET ? IPPROTO_ICMP : IPPROTO_ICMPV6;
 	const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6";
+	uint8_t flowtype = af == AF_INET ? FLOW_PING4 : FLOW_PING6;
 	union icmp_epoll_ref iref = { .id = id };
+	union flow *flow = flow_alloc();
+	struct icmp_ping_flow *pingf;
 	const void *bind_addr;
 	const char *bind_if;
-	int s;
+
+	if (!flow)
+		return NULL;
+
+	pingf = FLOW_START(flow, flowtype, ping, TAPSIDE);
+
+	pingf->seq = -1;
+	pingf->id = id;
 
 	if (af == AF_INET) {
 		bind_addr = &c->ip4.addr_out;
@@ -171,28 +182,28 @@ static int icmp_ping_new(const struct ctx *c, struct icmp_id_sock *id_sock,
 		bind_if = c->ip6.ifname_out;
 	}
 
-	s = sock_l4(c, af, proto, bind_addr, bind_if, 0, iref.u32);
+	pingf->sock = sock_l4(c, af, flow_proto[flowtype], bind_addr, bind_if,
+			      0, iref.u32);
 
-	if (s < 0) {
+	if (pingf->sock < 0) {
 		warn("Cannot open \"ping\" socket. You might need to:");
 		warn("  sysctl -w net.ipv4.ping_group_range=\"0 2147483647\"");
 		warn("...echo requests/replies will fail.");
 		goto cancel;
 	}
 
-	if (s > FD_REF_MAX)
+	if (pingf->sock > FD_REF_MAX)
 		goto cancel;
 
-	id_sock->sock = s;
+	*id_sock = pingf;
 
-	debug("%s: new socket %i for echo ID %"PRIu16, pname, s, id);
+	debug("%s: new socket %i for echo ID %"PRIu16, pname, pingf->sock, id);
 
-	return s;
+	return pingf;
 
 cancel:
-	if (s >= 0)
-		close(s);
-	return -1;
+	flow_alloc_cancel(flow);
+	return NULL;
 }
 
 /**
@@ -214,14 +225,13 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 	const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6";
 	union sockaddr_inany sa = { .sa_family = af };
 	const socklen_t sl = af == AF_INET ? sizeof(sa.sa4) : sizeof(sa.sa6);
-	struct icmp_id_sock *id_sock;
+	struct icmp_ping_flow *pingf, **id_sock;
 	uint16_t id, seq;
 	size_t plen;
 	void *pkt;
-	int s;
 
 	(void)saddr;
-	(void)pif;
+	ASSERT(pif == PIF_TAP);
 
 	if (af == AF_INET) {
 		const struct icmphdr *ih;
@@ -260,13 +270,13 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 		ASSERT(0);
 	}
 
-	if ((s = id_sock->sock) < 0)
-		if ((s = icmp_ping_new(c, id_sock, af, id)) < 0)
+	if (!(pingf = *id_sock))
+		if (!(pingf = icmp_ping_new(c, id_sock, af, id)))
 			return 1;
 
-	id_sock->ts = now->tv_sec;
+	pingf->ts = now->tv_sec;
 
-	if (sendto(s, pkt, plen, MSG_NOSIGNAL, &sa.sa, sl) < 0) {
+	if (sendto(pingf->sock, pkt, plen, MSG_NOSIGNAL, &sa.sa, sl) < 0) {
 		debug("%s: failed to relay request to socket: %s",
 		      pname, strerror(errno));
 	} else {
@@ -278,44 +288,21 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 }
 
 /**
- * icmp_timer_one() - Handler for timed events related to a given identifier
- * @c:		Execution context
- * @id_sock:	Socket fd and activity timestamp
- * @now:	Current timestamp
- */
-static void icmp_timer_one(const struct ctx *c, struct icmp_id_sock *id_sock,
-			   const struct timespec *now)
-{
-	if (id_sock->sock < 0 || now->tv_sec - id_sock->ts <= ICMP_ECHO_TIMEOUT)
-		return;
-
-	icmp_ping_close(c, id_sock);
-}
-
-/**
- * icmp_timer() - Scan activity bitmap for identifiers with timed events
+ * icmp_ping_timer() - Handler for timed events related to a given flow
  * @c:		Execution context
+ * @flow:	flow table entry to check for timeout
  * @now:	Current timestamp
+ *
+ * Return: true if the flow is ready to free, false otherwise
  */
-void icmp_timer(const struct ctx *c, const struct timespec *now)
+bool icmp_ping_timer(const struct ctx *c, union flow *flow,
+		     const struct timespec *now)
 {
-	unsigned int i;
-
-	for (i = 0; i < ICMP_NUM_IDS; i++) {
-		icmp_timer_one(c, &icmp_id_map[V4][i], now);
-		icmp_timer_one(c, &icmp_id_map[V6][i], now);
-	}
-}
+	const struct icmp_ping_flow *pingf = &flow->ping;
 
-/**
- * icmp_init() - Initialise sequences in ID map to -1 (no sequence sent yet)
- */
-void icmp_init(void)
-{
-	unsigned i;
+	if (now->tv_sec - pingf->ts <= ICMP_ECHO_TIMEOUT)
+		return false;
 
-	for (i = 0; i < ICMP_NUM_IDS; i++) {
-		icmp_id_map[V4][i].seq = icmp_id_map[V6][i].seq = -1;
-		icmp_id_map[V4][i].sock = icmp_id_map[V6][i].sock = -1;
-	}
+	icmp_ping_close(c, pingf);
+	return true;
 }
diff --git a/icmp.h b/icmp.h
index 263101db..4f695da1 100644
--- a/icmp.h
+++ b/icmp.h
@@ -9,12 +9,12 @@
 #define ICMP_TIMER_INTERVAL		10000 /* ms */
 
 struct ctx;
+struct icmp_ping_flow;
 
 void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref);
 int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 		     const void *saddr, const void *daddr,
 		     const struct pool *p, const struct timespec *now);
-void icmp_timer(const struct ctx *c, const struct timespec *now);
 void icmp_init(void);
 
 /**
diff --git a/icmp_flow.h b/icmp_flow.h
new file mode 100644
index 00000000..5a2eed9c
--- /dev/null
+++ b/icmp_flow.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright Red Hat
+ * Author: David Gibson <david@gibson.dropbear.id.au>
+ *
+ * ICMP flow tracking data structures
+ */
+#ifndef ICMP_FLOW_H
+#define ICMP_FLOW_H
+
+/**
+ * struct icmp_ping_flow - Descriptor for a flow of ping requests/replies
+ * @f:		Generic flow information
+ * @seq:	Last sequence number sent to tap, host order, -1: not sent yet
+ * @sock:	"ping" socket
+ * @ts:		Last associated activity from tap, seconds
+ * @id:		ICMP id for the flow as seen by the guest
+ */
+struct icmp_ping_flow {
+	/* Must be first element */
+	struct flow_common f;
+
+	int seq;
+	int sock;
+	time_t ts;
+	uint16_t id;
+};
+
+bool icmp_ping_timer(const struct ctx *c, union flow *flow,
+		     const struct timespec *now);
+
+#endif /* ICMP_FLOW_H */
diff --git a/passt.c b/passt.c
index a061f2ba..0f794927 100644
--- a/passt.c
+++ b/passt.c
@@ -106,8 +106,6 @@ static void post_handler(struct ctx *c, const struct timespec *now)
 	CALL_PROTO_HANDLER(c, now, tcp, TCP);
 	/* NOLINTNEXTLINE(bugprone-branch-clone): intervals can be the same */
 	CALL_PROTO_HANDLER(c, now, udp, UDP);
-	/* NOLINTNEXTLINE(bugprone-branch-clone): intervals can be the same */
-	CALL_PROTO_HANDLER(c, now, icmp, ICMP);
 
 	flow_defer_handler(c, now);
 #undef CALL_PROTO_HANDLER
@@ -288,9 +286,6 @@ int main(int argc, char **argv)
 	if ((!c.no_udp && udp_init(&c)) || (!c.no_tcp && tcp_init(&c)))
 		exit(EXIT_FAILURE);
 
-	if (!c.no_icmp)
-		icmp_init();
-
 	proto_update_l2_buf(c.mac_guest, c.mac);
 
 	if (c.ifi4 && !c.no_dhcp)
-- 
@@ -106,8 +106,6 @@ static void post_handler(struct ctx *c, const struct timespec *now)
 	CALL_PROTO_HANDLER(c, now, tcp, TCP);
 	/* NOLINTNEXTLINE(bugprone-branch-clone): intervals can be the same */
 	CALL_PROTO_HANDLER(c, now, udp, UDP);
-	/* NOLINTNEXTLINE(bugprone-branch-clone): intervals can be the same */
-	CALL_PROTO_HANDLER(c, now, icmp, ICMP);
 
 	flow_defer_handler(c, now);
 #undef CALL_PROTO_HANDLER
@@ -288,9 +286,6 @@ int main(int argc, char **argv)
 	if ((!c.no_udp && udp_init(&c)) || (!c.no_tcp && tcp_init(&c)))
 		exit(EXIT_FAILURE);
 
-	if (!c.no_icmp)
-		icmp_init();
-
 	proto_update_l2_buf(c.mac_guest, c.mac);
 
 	if (c.ifi4 && !c.no_dhcp)
-- 
2.44.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/3] icmp: Flow based error reporting
  2024-02-29  4:15 [PATCH 0/3] Basic integration of ICMP with flow table David Gibson
  2024-02-29  4:15 ` [PATCH 1/3] icmp: Store ping socket information in " David Gibson
@ 2024-02-29  4:15 ` David Gibson
  2024-03-07 23:26   ` Stefano Brivio
  2024-02-29  4:15 ` [PATCH 3/3] icmp: Use 'flowside' epoll references for ping sockets David Gibson
  2024-03-13 10:08 ` [PATCH 0/3] Basic integration of ICMP with flow table Stefano Brivio
  3 siblings, 1 reply; 12+ messages in thread
From: David Gibson @ 2024-02-29  4:15 UTC (permalink / raw)
  To: Stefano Brivio, passt-dev; +Cc: David Gibson

Use flow_dbg() and flow_err() helpers to generate flow-linked error
messages in most places.  Make a few small improvements to the messages
while we're at it.  This allows us to avoid the awkward 'pname' variables
since whether we're dealing with ICMP or ICMPv6 is already built into the
flow type which these helpers include.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 icmp.c | 31 ++++++++++++++-----------------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/icmp.c b/icmp.c
index 1caf791d..63adafcf 100644
--- a/icmp.c
+++ b/icmp.c
@@ -61,7 +61,6 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
 {
 	struct icmp_ping_flow *pingf = af == AF_INET
 		? icmp_id_map[V4][ref.icmp.id] : icmp_id_map[V6][ref.icmp.id];
-	const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6";
 	union sockaddr_inany sr;
 	socklen_t sl = sizeof(sr);
 	char buf[USHRT_MAX];
@@ -75,8 +74,7 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
 
 	n = recvfrom(ref.fd, buf, sizeof(buf), 0, &sr.sa, &sl);
 	if (n < 0) {
-		warn("%s: recvfrom() error on ping socket: %s",
-		     pname, strerror(errno));
+		flow_err(pingf, "recvfrom() error: %s", strerror(errno));
 		return;
 	}
 	if (sr.sa_family != af)
@@ -113,8 +111,9 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
 		pingf->seq = seq;
 	}
 
-	debug("%s: echo reply to tap, ID: %"PRIu16", seq: %"PRIu16, pname,
-	      ref.icmp.id, seq);
+	flow_dbg(pingf, "echo reply to tap, ID: %"PRIu16", seq: %"PRIu16,
+		 ref.icmp.id, seq);
+
 	if (af == AF_INET)
 		tap_icmp4_send(c, sr.sa4.sin_addr, tap_ip4_daddr(c), buf, n);
 	else if (af == AF_INET6)
@@ -123,7 +122,7 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
 	return;
 
 unexpected:
-	warn("%s: Unexpected packet on ping socket", pname);
+	flow_err(pingf, "Unexpected packet on ping socket");
 }
 
 /**
@@ -158,7 +157,6 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
 					    struct icmp_ping_flow **id_sock,
 					    sa_family_t af, uint16_t id)
 {
-	const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6";
 	uint8_t flowtype = af == AF_INET ? FLOW_PING4 : FLOW_PING6;
 	union icmp_epoll_ref iref = { .id = id };
 	union flow *flow = flow_alloc();
@@ -195,9 +193,9 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
 	if (pingf->sock > FD_REF_MAX)
 		goto cancel;
 
-	*id_sock = pingf;
+	flow_dbg(pingf, "new socket %i for echo ID %"PRIu16, pingf->sock, id);
 
-	debug("%s: new socket %i for echo ID %"PRIu16, pname, pingf->sock, id);
+	*id_sock = pingf;
 
 	return pingf;
 
@@ -222,7 +220,6 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 		     const void *saddr, const void *daddr,
 		     const struct pool *p, const struct timespec *now)
 {
-	const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6";
 	union sockaddr_inany sa = { .sa_family = af };
 	const socklen_t sl = af == AF_INET ? sizeof(sa.sa4) : sizeof(sa.sa6);
 	struct icmp_ping_flow *pingf, **id_sock;
@@ -276,13 +273,13 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 
 	pingf->ts = now->tv_sec;
 
-	if (sendto(pingf->sock, pkt, plen, MSG_NOSIGNAL, &sa.sa, sl) < 0) {
-		debug("%s: failed to relay request to socket: %s",
-		      pname, strerror(errno));
-	} else {
-		debug("%s: echo request to socket, ID: %"PRIu16", seq: %"PRIu16,
-		      pname, id, seq);
-	}
+	if (sendto(pingf->sock, pkt, plen, MSG_NOSIGNAL, &sa.sa, sl) < 0)
+		flow_dbg(pingf, "failed to relay request to socket: %s",
+			 strerror(errno));
+	else
+		flow_dbg(pingf,
+			 "echo request to socket, ID: %"PRIu16", seq: %"PRIu16,
+			 id, seq);
 
 	return 1;
 }
-- 
@@ -61,7 +61,6 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
 {
 	struct icmp_ping_flow *pingf = af == AF_INET
 		? icmp_id_map[V4][ref.icmp.id] : icmp_id_map[V6][ref.icmp.id];
-	const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6";
 	union sockaddr_inany sr;
 	socklen_t sl = sizeof(sr);
 	char buf[USHRT_MAX];
@@ -75,8 +74,7 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
 
 	n = recvfrom(ref.fd, buf, sizeof(buf), 0, &sr.sa, &sl);
 	if (n < 0) {
-		warn("%s: recvfrom() error on ping socket: %s",
-		     pname, strerror(errno));
+		flow_err(pingf, "recvfrom() error: %s", strerror(errno));
 		return;
 	}
 	if (sr.sa_family != af)
@@ -113,8 +111,9 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
 		pingf->seq = seq;
 	}
 
-	debug("%s: echo reply to tap, ID: %"PRIu16", seq: %"PRIu16, pname,
-	      ref.icmp.id, seq);
+	flow_dbg(pingf, "echo reply to tap, ID: %"PRIu16", seq: %"PRIu16,
+		 ref.icmp.id, seq);
+
 	if (af == AF_INET)
 		tap_icmp4_send(c, sr.sa4.sin_addr, tap_ip4_daddr(c), buf, n);
 	else if (af == AF_INET6)
@@ -123,7 +122,7 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
 	return;
 
 unexpected:
-	warn("%s: Unexpected packet on ping socket", pname);
+	flow_err(pingf, "Unexpected packet on ping socket");
 }
 
 /**
@@ -158,7 +157,6 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
 					    struct icmp_ping_flow **id_sock,
 					    sa_family_t af, uint16_t id)
 {
-	const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6";
 	uint8_t flowtype = af == AF_INET ? FLOW_PING4 : FLOW_PING6;
 	union icmp_epoll_ref iref = { .id = id };
 	union flow *flow = flow_alloc();
@@ -195,9 +193,9 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
 	if (pingf->sock > FD_REF_MAX)
 		goto cancel;
 
-	*id_sock = pingf;
+	flow_dbg(pingf, "new socket %i for echo ID %"PRIu16, pingf->sock, id);
 
-	debug("%s: new socket %i for echo ID %"PRIu16, pname, pingf->sock, id);
+	*id_sock = pingf;
 
 	return pingf;
 
@@ -222,7 +220,6 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 		     const void *saddr, const void *daddr,
 		     const struct pool *p, const struct timespec *now)
 {
-	const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6";
 	union sockaddr_inany sa = { .sa_family = af };
 	const socklen_t sl = af == AF_INET ? sizeof(sa.sa4) : sizeof(sa.sa6);
 	struct icmp_ping_flow *pingf, **id_sock;
@@ -276,13 +273,13 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 
 	pingf->ts = now->tv_sec;
 
-	if (sendto(pingf->sock, pkt, plen, MSG_NOSIGNAL, &sa.sa, sl) < 0) {
-		debug("%s: failed to relay request to socket: %s",
-		      pname, strerror(errno));
-	} else {
-		debug("%s: echo request to socket, ID: %"PRIu16", seq: %"PRIu16,
-		      pname, id, seq);
-	}
+	if (sendto(pingf->sock, pkt, plen, MSG_NOSIGNAL, &sa.sa, sl) < 0)
+		flow_dbg(pingf, "failed to relay request to socket: %s",
+			 strerror(errno));
+	else
+		flow_dbg(pingf,
+			 "echo request to socket, ID: %"PRIu16", seq: %"PRIu16,
+			 id, seq);
 
 	return 1;
 }
-- 
2.44.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/3] icmp: Use 'flowside' epoll references for ping sockets
  2024-02-29  4:15 [PATCH 0/3] Basic integration of ICMP with flow table David Gibson
  2024-02-29  4:15 ` [PATCH 1/3] icmp: Store ping socket information in " David Gibson
  2024-02-29  4:15 ` [PATCH 2/3] icmp: Flow based error reporting David Gibson
@ 2024-02-29  4:15 ` David Gibson
  2024-03-13 10:08 ` [PATCH 0/3] Basic integration of ICMP with flow table Stefano Brivio
  3 siblings, 0 replies; 12+ messages in thread
From: David Gibson @ 2024-02-29  4:15 UTC (permalink / raw)
  To: Stefano Brivio, passt-dev; +Cc: David Gibson

Currently ping sockets use a custom epoll reference type which includes
the ICMP id.  However, now that we have entries in the flow table for
ping flows, finding that is sufficient to get everything else we want,
including the id.  Therefore remove the icmp_epoll_ref type and use the
general 'flowside' field for ping sockets.

Having done this we no longer need separate EPOLL_TYPE_ICMP and
EPOLL_TYPE_ICMPV6 reference types, because we can easily determine
which case we have from the flow type. Merge both types into
EPOLL_TYPE_PING.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 icmp.c  | 34 +++++++++++++++++-----------------
 icmp.h  | 13 +------------
 passt.c | 10 +++-------
 passt.h |  7 ++-----
 util.c  |  4 +---
 5 files changed, 24 insertions(+), 44 deletions(-)

diff --git a/icmp.c b/icmp.c
index 63adafcf..ec28e419 100644
--- a/icmp.c
+++ b/icmp.c
@@ -48,19 +48,19 @@
 #define	SOCKSIDE	0
 #define	TAPSIDE		1
 
+#define PINGF(idx)		(&(FLOW(idx)->ping))
+
 /* Indexed by ICMP echo identifier */
 static struct icmp_ping_flow *icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS];
 
 /**
  * icmp_sock_handler() - Handle new data from ICMP or ICMPv6 socket
  * @c:		Execution context
- * @af:		Address family (AF_INET or AF_INET6)
  * @ref:	epoll reference
  */
-void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
+void icmp_sock_handler(const struct ctx *c, union epoll_ref ref)
 {
-	struct icmp_ping_flow *pingf = af == AF_INET
-		? icmp_id_map[V4][ref.icmp.id] : icmp_id_map[V6][ref.icmp.id];
+	struct icmp_ping_flow *pingf = PINGF(ref.flowside.flow);
 	union sockaddr_inany sr;
 	socklen_t sl = sizeof(sr);
 	char buf[USHRT_MAX];
@@ -77,27 +77,26 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
 		flow_err(pingf, "recvfrom() error: %s", strerror(errno));
 		return;
 	}
-	if (sr.sa_family != af)
-		goto unexpected;
 
-	if (af == AF_INET) {
+	if (pingf->f.type == FLOW_PING4) {
 		struct icmphdr *ih4 = (struct icmphdr *)buf;
 
-		if ((size_t)n < sizeof(*ih4) || ih4->type != ICMP_ECHOREPLY)
+		if (sr.sa_family != AF_INET || (size_t)n < sizeof(*ih4) ||
+		    ih4->type != ICMP_ECHOREPLY)
 			goto unexpected;
 
 		/* Adjust packet back to guest-side ID */
-		ih4->un.echo.id = htons(ref.icmp.id);
+		ih4->un.echo.id = htons(pingf->id);
 		seq = ntohs(ih4->un.echo.sequence);
-	} else if (af == AF_INET6) {
+	} else if (pingf->f.type == FLOW_PING6) {
 		struct icmp6hdr *ih6 = (struct icmp6hdr *)buf;
 
-		if ((size_t)n < sizeof(*ih6) ||
+		if (sr.sa_family != AF_INET6 || (size_t)n < sizeof(*ih6) ||
 		    ih6->icmp6_type != ICMPV6_ECHO_REPLY)
 			goto unexpected;
 
 		/* Adjust packet back to guest-side ID */
-		ih6->icmp6_identifier = htons(ref.icmp.id);
+		ih6->icmp6_identifier = htons(pingf->id);
 		seq = ntohs(ih6->icmp6_sequence);
 	} else {
 		ASSERT(0);
@@ -112,11 +111,11 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
 	}
 
 	flow_dbg(pingf, "echo reply to tap, ID: %"PRIu16", seq: %"PRIu16,
-		 ref.icmp.id, seq);
+		 pingf->id, seq);
 
-	if (af == AF_INET)
+	if (pingf->f.type == FLOW_PING4)
 		tap_icmp4_send(c, sr.sa4.sin_addr, tap_ip4_daddr(c), buf, n);
-	else if (af == AF_INET6)
+	else if (pingf->f.type == FLOW_PING6)
 		tap_icmp6_send(c, &sr.sa6.sin6_addr,
 			       tap_ip6_daddr(c, &sr.sa6.sin6_addr), buf, n);
 	return;
@@ -158,7 +157,7 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
 					    sa_family_t af, uint16_t id)
 {
 	uint8_t flowtype = af == AF_INET ? FLOW_PING4 : FLOW_PING6;
-	union icmp_epoll_ref iref = { .id = id };
+	union epoll_ref ref = { .type = EPOLL_TYPE_PING };
 	union flow *flow = flow_alloc();
 	struct icmp_ping_flow *pingf;
 	const void *bind_addr;
@@ -180,8 +179,9 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
 		bind_if = c->ip6.ifname_out;
 	}
 
+	ref.flowside = FLOW_SIDX(flow, SOCKSIDE);
 	pingf->sock = sock_l4(c, af, flow_proto[flowtype], bind_addr, bind_if,
-			      0, iref.u32);
+			      0, ref.data);
 
 	if (pingf->sock < 0) {
 		warn("Cannot open \"ping\" socket. You might need to:");
diff --git a/icmp.h b/icmp.h
index 4f695da1..5ce22b5e 100644
--- a/icmp.h
+++ b/icmp.h
@@ -11,23 +11,12 @@
 struct ctx;
 struct icmp_ping_flow;
 
-void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref);
+void icmp_sock_handler(const struct ctx *c, union epoll_ref ref);
 int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 		     const void *saddr, const void *daddr,
 		     const struct pool *p, const struct timespec *now);
 void icmp_init(void);
 
-/**
- * union icmp_epoll_ref - epoll reference portion for ICMP tracking
- * @v6:			Set for IPv6 sockets or connections
- * @u32:		Opaque u32 value of reference
- * @id:			Associated echo identifier, needed if bind() fails
- */
-union icmp_epoll_ref {
-	uint16_t id;
-	uint32_t u32;
-};
-
 /**
  * struct icmp_ctx - Execution context for ICMP routines
  * @timer_run:		Timestamp of most recent timer run
diff --git a/passt.c b/passt.c
index 0f794927..f4306483 100644
--- a/passt.c
+++ b/passt.c
@@ -66,8 +66,7 @@ char *epoll_type_str[] = {
 	[EPOLL_TYPE_TCP_LISTEN]		= "listening TCP socket",
 	[EPOLL_TYPE_TCP_TIMER]		= "TCP timer",
 	[EPOLL_TYPE_UDP]		= "UDP socket",
-	[EPOLL_TYPE_ICMP]		= "ICMP socket",
-	[EPOLL_TYPE_ICMPV6]		= "ICMPv6 socket",
+	[EPOLL_TYPE_PING]	= "ICMP/ICMPv6 ping socket",
 	[EPOLL_TYPE_NSQUIT_INOTIFY]	= "namespace inotify watch",
 	[EPOLL_TYPE_NSQUIT_TIMER]	= "namespace timer watch",
 	[EPOLL_TYPE_TAP_PASTA]		= "/dev/net/tun device",
@@ -377,11 +376,8 @@ loop:
 		case EPOLL_TYPE_UDP:
 			udp_sock_handler(&c, ref, eventmask, &now);
 			break;
-		case EPOLL_TYPE_ICMP:
-			icmp_sock_handler(&c, AF_INET, ref);
-			break;
-		case EPOLL_TYPE_ICMPV6:
-			icmp_sock_handler(&c, AF_INET6, ref);
+		case EPOLL_TYPE_PING:
+			icmp_sock_handler(&c, ref);
 			break;
 		default:
 			/* Can't happen */
diff --git a/passt.h b/passt.h
index e6d43585..76026f09 100644
--- a/passt.h
+++ b/passt.h
@@ -59,10 +59,8 @@ enum epoll_type {
 	EPOLL_TYPE_TCP_TIMER,
 	/* UDP sockets */
 	EPOLL_TYPE_UDP,
-	/* IPv4 ICMP sockets */
-	EPOLL_TYPE_ICMP,
-	/* ICMPv6 sockets */
-	EPOLL_TYPE_ICMPV6,
+	/* ICMP/ICMPv6 ping sockets */
+	EPOLL_TYPE_PING,
 	/* inotify fd watching for end of netns (pasta) */
 	EPOLL_TYPE_NSQUIT_INOTIFY,
 	/* timer fd watching for end of netns, fallback for inotify (pasta) */
@@ -100,7 +98,6 @@ union epoll_ref {
 			flow_sidx_t flowside;
 			union tcp_listen_epoll_ref tcp_listen;
 			union udp_epoll_ref udp;
-			union icmp_epoll_ref icmp;
 			uint32_t data;
 			int nsdir_fd;
 		};
diff --git a/util.c b/util.c
index 03e393f6..5f18bd61 100644
--- a/util.c
+++ b/util.c
@@ -127,10 +127,8 @@ int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto,
 		ref.type = EPOLL_TYPE_UDP;
 		break;
 	case IPPROTO_ICMP:
-		ref.type = EPOLL_TYPE_ICMP;
-		break;
 	case IPPROTO_ICMPV6:
-		ref.type = EPOLL_TYPE_ICMPV6;
+		ref.type = EPOLL_TYPE_PING;
 		break;
 	default:
 		return -EPFNOSUPPORT;	/* Not implemented. */
-- 
@@ -127,10 +127,8 @@ int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto,
 		ref.type = EPOLL_TYPE_UDP;
 		break;
 	case IPPROTO_ICMP:
-		ref.type = EPOLL_TYPE_ICMP;
-		break;
 	case IPPROTO_ICMPV6:
-		ref.type = EPOLL_TYPE_ICMPV6;
+		ref.type = EPOLL_TYPE_PING;
 		break;
 	default:
 		return -EPFNOSUPPORT;	/* Not implemented. */
-- 
2.44.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] icmp: Store ping socket information in flow table
  2024-02-29  4:15 ` [PATCH 1/3] icmp: Store ping socket information in " David Gibson
@ 2024-03-07  8:53   ` Stefano Brivio
  2024-03-07 10:24     ` David Gibson
  0 siblings, 1 reply; 12+ messages in thread
From: Stefano Brivio @ 2024-03-07  8:53 UTC (permalink / raw)
  To: David Gibson; +Cc: passt-dev

Apologies for the delay, I'm still reviewing 3/3, but I have a question
meanwhile:

On Thu, 29 Feb 2024 15:15:32 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> Currently icmp_id_map[][] stores information about ping sockets in a
> bespoke structure.  Move the same information into new types of flow
> in the flow table.  To match that change, replace the existing ICMP
> timer with a flow-based timer for expiring ping sockets.  This has the
> advantage that we only need to scan the active flows, not all possible
> ids.
> 
> We convert icmp_id_map[][] to point to the flow table entries, rather
> than containing its own information.  We do still use that array for
> locating the right ping flows, rather than using a "flow native" form
> of lookup for the time being.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  Makefile     |   6 +--
>  flow.c       |   9 ++++
>  flow.h       |   4 ++
>  flow_table.h |   2 +
>  icmp.c       | 143 +++++++++++++++++++++++----------------------------
>  icmp.h       |   2 +-
>  icmp_flow.h  |  31 +++++++++++
>  passt.c      |   5 --
>  8 files changed, 115 insertions(+), 87 deletions(-)
>  create mode 100644 icmp_flow.h
> 
> diff --git a/Makefile b/Makefile
> index 2d6a5155..47fc5448 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -54,9 +54,9 @@ SRCS = $(PASST_SRCS) $(QRAP_SRCS)
>  MANPAGES = passt.1 pasta.1 qrap.1
>  
>  PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h fwd.h \
> -	flow_table.h icmp.h inany.h iov.h isolation.h lineread.h log.h ndp.h \
> -	netlink.h packet.h passt.h pasta.h pcap.h pif.h siphash.h tap.h tcp.h \
> -	tcp_conn.h tcp_splice.h udp.h util.h
> +	flow_table.h icmp.h icmp_flow.h inany.h iov.h isolation.h lineread.h \
> +	log.h ndp.h netlink.h packet.h passt.h pasta.h pcap.h pif.h siphash.h \
> +	tap.h tcp.h tcp_conn.h tcp_splice.h udp.h util.h
>  HEADERS = $(PASST_HEADERS) seccomp.h
>  
>  C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_snd_wnd = 0 };
> diff --git a/flow.c b/flow.c
> index d7974d59..5835d6c0 100644
> --- a/flow.c
> +++ b/flow.c
> @@ -21,6 +21,8 @@ const char *flow_type_str[] = {
>  	[FLOW_TYPE_NONE]	= "<none>",
>  	[FLOW_TCP]		= "TCP connection",
>  	[FLOW_TCP_SPLICE]	= "TCP connection (spliced)",
> +	[FLOW_PING4]		= "ICMP ping sequence",
> +	[FLOW_PING6]		= "ICMPv6 ping sequence",
>  };
>  static_assert(ARRAY_SIZE(flow_type_str) == FLOW_NUM_TYPES,
>  	      "flow_type_str[] doesn't match enum flow_type");
> @@ -28,6 +30,8 @@ static_assert(ARRAY_SIZE(flow_type_str) == FLOW_NUM_TYPES,
>  const uint8_t flow_proto[] = {
>  	[FLOW_TCP]		= IPPROTO_TCP,
>  	[FLOW_TCP_SPLICE]	= IPPROTO_TCP,
> +	[FLOW_PING4]		= IPPROTO_ICMP,
> +	[FLOW_PING6]		= IPPROTO_ICMPV6,
>  };
>  static_assert(ARRAY_SIZE(flow_proto) == FLOW_NUM_TYPES,
>  	      "flow_proto[] doesn't match enum flow_type");
> @@ -294,6 +298,11 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
>  			if (!closed && timer)
>  				tcp_splice_timer(c, flow);
>  			break;
> +		case FLOW_PING4:
> +		case FLOW_PING6:
> +			if (timer)
> +				closed = icmp_ping_timer(c, flow, now);
> +			break;
>  		default:
>  			/* Assume other flow types don't need any handling */
>  			;
> diff --git a/flow.h b/flow.h
> index 8b66751b..c943c441 100644
> --- a/flow.h
> +++ b/flow.h
> @@ -19,6 +19,10 @@ enum flow_type {
>  	FLOW_TCP,
>  	/* A TCP connection between a host socket and ns socket */
>  	FLOW_TCP_SPLICE,
> +	/* ICMP echo requests from guest to host and matching replies back */
> +	FLOW_PING4,
> +	/* ICMPv6 echo requests from guest to host and matching replies back */
> +	FLOW_PING6,
>  
>  	FLOW_NUM_TYPES,
>  };
> diff --git a/flow_table.h b/flow_table.h
> index eecf8844..b7e5529a 100644
> --- a/flow_table.h
> +++ b/flow_table.h
> @@ -8,6 +8,7 @@
>  #define FLOW_TABLE_H
>  
>  #include "tcp_conn.h"
> +#include "icmp_flow.h"
>  
>  /**
>   * struct flow_free_cluster - Information about a cluster of free entries
> @@ -33,6 +34,7 @@ union flow {
>  	struct flow_free_cluster free;
>  	struct tcp_tap_conn tcp;
>  	struct tcp_splice_conn tcp_splice;
> +	struct icmp_ping_flow ping;
>  };
>  
>  /* Global Flow Table */
> diff --git a/icmp.c b/icmp.c
> index fb2fcafc..1caf791d 100644
> --- a/icmp.c
> +++ b/icmp.c
> @@ -39,24 +39,17 @@
>  #include "siphash.h"
>  #include "inany.h"
>  #include "icmp.h"
> +#include "flow_table.h"
>  
>  #define ICMP_ECHO_TIMEOUT	60 /* s, timeout for ICMP socket activity */
>  #define ICMP_NUM_IDS		(1U << 16)
>  
> -/**
> - * struct icmp_id_sock - Tracking information for single ICMP echo identifier
> - * @sock:	Bound socket for identifier
> - * @seq:	Last sequence number sent to tap, host order, -1: not sent yet
> - * @ts:		Last associated activity from tap, seconds
> - */
> -struct icmp_id_sock {
> -	int sock;
> -	int seq;
> -	time_t ts;
> -};
> +/* Sides of a flow as we use them for ping streams */
> +#define	SOCKSIDE	0
> +#define	TAPSIDE		1
>  
>  /* Indexed by ICMP echo identifier */
> -static struct icmp_id_sock icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS];
> +static struct icmp_ping_flow *icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS];
>  
>  /**
>   * icmp_sock_handler() - Handle new data from ICMP or ICMPv6 socket
> @@ -66,8 +59,8 @@ static struct icmp_id_sock icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS];
>   */
>  void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
>  {
> -	struct icmp_id_sock *const id_sock = af == AF_INET
> -		? &icmp_id_map[V4][ref.icmp.id] : &icmp_id_map[V6][ref.icmp.id];
> +	struct icmp_ping_flow *pingf = af == AF_INET
> +		? icmp_id_map[V4][ref.icmp.id] : icmp_id_map[V6][ref.icmp.id];
>  	const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6";
>  	union sockaddr_inany sr;
>  	socklen_t sl = sizeof(sr);
> @@ -78,6 +71,8 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
>  	if (c->no_icmp)
>  		return;
>  
> +	ASSERT(pingf);
> +
>  	n = recvfrom(ref.fd, buf, sizeof(buf), 0, &sr.sa, &sl);
>  	if (n < 0) {
>  		warn("%s: recvfrom() error on ping socket: %s",
> @@ -112,10 +107,10 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
>  
>  	/* In PASTA mode, we'll get any reply we send, discard them. */
>  	if (c->mode == MODE_PASTA) {
> -		if (id_sock->seq == seq)
> +		if (pingf->seq == seq)
>  			return;
>  
> -		id_sock->seq = seq;
> +		pingf->seq = seq;
>  	}
>  
>  	debug("%s: echo reply to tap, ID: %"PRIu16", seq: %"PRIu16, pname,
> @@ -132,16 +127,22 @@ unexpected:
>  }
>  
>  /**
> - * icmp_ping_close() - Close and clean up a ping socket
> + * icmp_ping_close() - Close and clean up a ping flow
>   * @c:		Execution context
> - * @id_sock:	Socket number and other info
> + * @pingf:	ping flow entry to close
>   */
> -static void icmp_ping_close(const struct ctx *c, struct icmp_id_sock *id_sock)
> +static void icmp_ping_close(const struct ctx *c,
> +			    const struct icmp_ping_flow *pingf)
>  {
> -	epoll_ctl(c->epollfd, EPOLL_CTL_DEL, id_sock->sock, NULL);
> -	close(id_sock->sock);
> -	id_sock->sock = -1;
> -	id_sock->seq = -1;
> +	uint16_t id = pingf->id;
> +
> +	epoll_ctl(c->epollfd, EPOLL_CTL_DEL, pingf->sock, NULL);
> +	close(pingf->sock);
> +
> +	if (pingf->f.type == FLOW_PING4)
> +		icmp_id_map[V4][id] = NULL;
> +	else
> +		icmp_id_map[V6][id] = NULL;
>  }
>  
>  /**
> @@ -151,17 +152,27 @@ static void icmp_ping_close(const struct ctx *c, struct icmp_id_sock *id_sock)
>   * @af:		Address family, AF_INET or AF_INET6
>   * @id:		ICMP id for the new socket
>   *
> - * Return: Newly opened ping socket fd, or -1 on failure
> + * Return: Newly opened ping flow, or NULL on failure
>   */
> -static int icmp_ping_new(const struct ctx *c, struct icmp_id_sock *id_sock,
> -			 sa_family_t af, uint16_t id)
> +static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
> +					    struct icmp_ping_flow **id_sock,

I'm not quite sure why we still need id_sock passed as parameter, and
what it's supposed to contain (you haven't updated the function
comment).

Now that all the information is encapsulated in the flow, and you
return the new flow, with a trivial change in icmp_tap_handler(),
couldn't we just drop id_sock here?

-- 
Stefano


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] icmp: Store ping socket information in flow table
  2024-03-07  8:53   ` Stefano Brivio
@ 2024-03-07 10:24     ` David Gibson
  2024-03-07 23:25       ` Stefano Brivio
  0 siblings, 1 reply; 12+ messages in thread
From: David Gibson @ 2024-03-07 10:24 UTC (permalink / raw)
  To: Stefano Brivio; +Cc: passt-dev

[-- Attachment #1: Type: text/plain, Size: 9683 bytes --]

On Thu, Mar 07, 2024 at 09:53:15AM +0100, Stefano Brivio wrote:
> Apologies for the delay, I'm still reviewing 3/3, but I have a question
> meanwhile:
> 
> On Thu, 29 Feb 2024 15:15:32 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > Currently icmp_id_map[][] stores information about ping sockets in a
> > bespoke structure.  Move the same information into new types of flow
> > in the flow table.  To match that change, replace the existing ICMP
> > timer with a flow-based timer for expiring ping sockets.  This has the
> > advantage that we only need to scan the active flows, not all possible
> > ids.
> > 
> > We convert icmp_id_map[][] to point to the flow table entries, rather
> > than containing its own information.  We do still use that array for
> > locating the right ping flows, rather than using a "flow native" form
> > of lookup for the time being.
> > 
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> >  Makefile     |   6 +--
> >  flow.c       |   9 ++++
> >  flow.h       |   4 ++
> >  flow_table.h |   2 +
> >  icmp.c       | 143 +++++++++++++++++++++++----------------------------
> >  icmp.h       |   2 +-
> >  icmp_flow.h  |  31 +++++++++++
> >  passt.c      |   5 --
> >  8 files changed, 115 insertions(+), 87 deletions(-)
> >  create mode 100644 icmp_flow.h
> > 
> > diff --git a/Makefile b/Makefile
> > index 2d6a5155..47fc5448 100644
> > --- a/Makefile
> > +++ b/Makefile
> > @@ -54,9 +54,9 @@ SRCS = $(PASST_SRCS) $(QRAP_SRCS)
> >  MANPAGES = passt.1 pasta.1 qrap.1
> >  
> >  PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h fwd.h \
> > -	flow_table.h icmp.h inany.h iov.h isolation.h lineread.h log.h ndp.h \
> > -	netlink.h packet.h passt.h pasta.h pcap.h pif.h siphash.h tap.h tcp.h \
> > -	tcp_conn.h tcp_splice.h udp.h util.h
> > +	flow_table.h icmp.h icmp_flow.h inany.h iov.h isolation.h lineread.h \
> > +	log.h ndp.h netlink.h packet.h passt.h pasta.h pcap.h pif.h siphash.h \
> > +	tap.h tcp.h tcp_conn.h tcp_splice.h udp.h util.h
> >  HEADERS = $(PASST_HEADERS) seccomp.h
> >  
> >  C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_snd_wnd = 0 };
> > diff --git a/flow.c b/flow.c
> > index d7974d59..5835d6c0 100644
> > --- a/flow.c
> > +++ b/flow.c
> > @@ -21,6 +21,8 @@ const char *flow_type_str[] = {
> >  	[FLOW_TYPE_NONE]	= "<none>",
> >  	[FLOW_TCP]		= "TCP connection",
> >  	[FLOW_TCP_SPLICE]	= "TCP connection (spliced)",
> > +	[FLOW_PING4]		= "ICMP ping sequence",
> > +	[FLOW_PING6]		= "ICMPv6 ping sequence",
> >  };
> >  static_assert(ARRAY_SIZE(flow_type_str) == FLOW_NUM_TYPES,
> >  	      "flow_type_str[] doesn't match enum flow_type");
> > @@ -28,6 +30,8 @@ static_assert(ARRAY_SIZE(flow_type_str) == FLOW_NUM_TYPES,
> >  const uint8_t flow_proto[] = {
> >  	[FLOW_TCP]		= IPPROTO_TCP,
> >  	[FLOW_TCP_SPLICE]	= IPPROTO_TCP,
> > +	[FLOW_PING4]		= IPPROTO_ICMP,
> > +	[FLOW_PING6]		= IPPROTO_ICMPV6,
> >  };
> >  static_assert(ARRAY_SIZE(flow_proto) == FLOW_NUM_TYPES,
> >  	      "flow_proto[] doesn't match enum flow_type");
> > @@ -294,6 +298,11 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
> >  			if (!closed && timer)
> >  				tcp_splice_timer(c, flow);
> >  			break;
> > +		case FLOW_PING4:
> > +		case FLOW_PING6:
> > +			if (timer)
> > +				closed = icmp_ping_timer(c, flow, now);
> > +			break;
> >  		default:
> >  			/* Assume other flow types don't need any handling */
> >  			;
> > diff --git a/flow.h b/flow.h
> > index 8b66751b..c943c441 100644
> > --- a/flow.h
> > +++ b/flow.h
> > @@ -19,6 +19,10 @@ enum flow_type {
> >  	FLOW_TCP,
> >  	/* A TCP connection between a host socket and ns socket */
> >  	FLOW_TCP_SPLICE,
> > +	/* ICMP echo requests from guest to host and matching replies back */
> > +	FLOW_PING4,
> > +	/* ICMPv6 echo requests from guest to host and matching replies back */
> > +	FLOW_PING6,
> >  
> >  	FLOW_NUM_TYPES,
> >  };
> > diff --git a/flow_table.h b/flow_table.h
> > index eecf8844..b7e5529a 100644
> > --- a/flow_table.h
> > +++ b/flow_table.h
> > @@ -8,6 +8,7 @@
> >  #define FLOW_TABLE_H
> >  
> >  #include "tcp_conn.h"
> > +#include "icmp_flow.h"
> >  
> >  /**
> >   * struct flow_free_cluster - Information about a cluster of free entries
> > @@ -33,6 +34,7 @@ union flow {
> >  	struct flow_free_cluster free;
> >  	struct tcp_tap_conn tcp;
> >  	struct tcp_splice_conn tcp_splice;
> > +	struct icmp_ping_flow ping;
> >  };
> >  
> >  /* Global Flow Table */
> > diff --git a/icmp.c b/icmp.c
> > index fb2fcafc..1caf791d 100644
> > --- a/icmp.c
> > +++ b/icmp.c
> > @@ -39,24 +39,17 @@
> >  #include "siphash.h"
> >  #include "inany.h"
> >  #include "icmp.h"
> > +#include "flow_table.h"
> >  
> >  #define ICMP_ECHO_TIMEOUT	60 /* s, timeout for ICMP socket activity */
> >  #define ICMP_NUM_IDS		(1U << 16)
> >  
> > -/**
> > - * struct icmp_id_sock - Tracking information for single ICMP echo identifier
> > - * @sock:	Bound socket for identifier
> > - * @seq:	Last sequence number sent to tap, host order, -1: not sent yet
> > - * @ts:		Last associated activity from tap, seconds
> > - */
> > -struct icmp_id_sock {
> > -	int sock;
> > -	int seq;
> > -	time_t ts;
> > -};
> > +/* Sides of a flow as we use them for ping streams */
> > +#define	SOCKSIDE	0
> > +#define	TAPSIDE		1
> >  
> >  /* Indexed by ICMP echo identifier */
> > -static struct icmp_id_sock icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS];
> > +static struct icmp_ping_flow *icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS];
> >  
> >  /**
> >   * icmp_sock_handler() - Handle new data from ICMP or ICMPv6 socket
> > @@ -66,8 +59,8 @@ static struct icmp_id_sock icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS];
> >   */
> >  void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
> >  {
> > -	struct icmp_id_sock *const id_sock = af == AF_INET
> > -		? &icmp_id_map[V4][ref.icmp.id] : &icmp_id_map[V6][ref.icmp.id];
> > +	struct icmp_ping_flow *pingf = af == AF_INET
> > +		? icmp_id_map[V4][ref.icmp.id] : icmp_id_map[V6][ref.icmp.id];
> >  	const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6";
> >  	union sockaddr_inany sr;
> >  	socklen_t sl = sizeof(sr);
> > @@ -78,6 +71,8 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
> >  	if (c->no_icmp)
> >  		return;
> >  
> > +	ASSERT(pingf);
> > +
> >  	n = recvfrom(ref.fd, buf, sizeof(buf), 0, &sr.sa, &sl);
> >  	if (n < 0) {
> >  		warn("%s: recvfrom() error on ping socket: %s",
> > @@ -112,10 +107,10 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
> >  
> >  	/* In PASTA mode, we'll get any reply we send, discard them. */
> >  	if (c->mode == MODE_PASTA) {
> > -		if (id_sock->seq == seq)
> > +		if (pingf->seq == seq)
> >  			return;
> >  
> > -		id_sock->seq = seq;
> > +		pingf->seq = seq;
> >  	}
> >  
> >  	debug("%s: echo reply to tap, ID: %"PRIu16", seq: %"PRIu16, pname,
> > @@ -132,16 +127,22 @@ unexpected:
> >  }
> >  
> >  /**
> > - * icmp_ping_close() - Close and clean up a ping socket
> > + * icmp_ping_close() - Close and clean up a ping flow
> >   * @c:		Execution context
> > - * @id_sock:	Socket number and other info
> > + * @pingf:	ping flow entry to close
> >   */
> > -static void icmp_ping_close(const struct ctx *c, struct icmp_id_sock *id_sock)
> > +static void icmp_ping_close(const struct ctx *c,
> > +			    const struct icmp_ping_flow *pingf)
> >  {
> > -	epoll_ctl(c->epollfd, EPOLL_CTL_DEL, id_sock->sock, NULL);
> > -	close(id_sock->sock);
> > -	id_sock->sock = -1;
> > -	id_sock->seq = -1;
> > +	uint16_t id = pingf->id;
> > +
> > +	epoll_ctl(c->epollfd, EPOLL_CTL_DEL, pingf->sock, NULL);
> > +	close(pingf->sock);
> > +
> > +	if (pingf->f.type == FLOW_PING4)
> > +		icmp_id_map[V4][id] = NULL;
> > +	else
> > +		icmp_id_map[V6][id] = NULL;
> >  }
> >  
> >  /**
> > @@ -151,17 +152,27 @@ static void icmp_ping_close(const struct ctx *c, struct icmp_id_sock *id_sock)
> >   * @af:		Address family, AF_INET or AF_INET6
> >   * @id:		ICMP id for the new socket
> >   *
> > - * Return: Newly opened ping socket fd, or -1 on failure
> > + * Return: Newly opened ping flow, or NULL on failure
> >   */
> > -static int icmp_ping_new(const struct ctx *c, struct icmp_id_sock *id_sock,
> > -			 sa_family_t af, uint16_t id)
> > +static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
> > +					    struct icmp_ping_flow **id_sock,
> 
> I'm not quite sure why we still need id_sock passed as parameter, and
> what it's supposed to contain (you haven't updated the function
> comment).

Oops.

> Now that all the information is encapsulated in the flow, and you
> return the new flow, with a trivial change in icmp_tap_handler(),
> couldn't we just drop id_sock here?

No, because this is only the "partial" flow table implementation,
lacking the address information in the common part of the flow.
Without that, while the information on a single flow is in the flow
table, we still need the icmp_id_map[] arrays to *find* the relevant
flow.  id_sock passed here is the relevant "slot" in those arrays, so
we can update them to index the new flow (that's why it's a (ping_flow
**) not a (ping_flow *)).

The "full" flow implementation removes those tables, and this
parameter.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] icmp: Store ping socket information in flow table
  2024-03-07 10:24     ` David Gibson
@ 2024-03-07 23:25       ` Stefano Brivio
  2024-03-08  0:48         ` David Gibson
  0 siblings, 1 reply; 12+ messages in thread
From: Stefano Brivio @ 2024-03-07 23:25 UTC (permalink / raw)
  To: David Gibson; +Cc: passt-dev

On Thu, 7 Mar 2024 21:24:28 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> On Thu, Mar 07, 2024 at 09:53:15AM +0100, Stefano Brivio wrote:
> > Apologies for the delay, I'm still reviewing 3/3, but I have a question
> > meanwhile:
> > 
> > On Thu, 29 Feb 2024 15:15:32 +1100
> > David Gibson <david@gibson.dropbear.id.au> wrote:
> >   
> > > Currently icmp_id_map[][] stores information about ping sockets in a
> > > bespoke structure.  Move the same information into new types of flow
> > > in the flow table.  To match that change, replace the existing ICMP
> > > timer with a flow-based timer for expiring ping sockets.  This has the
> > > advantage that we only need to scan the active flows, not all possible
> > > ids.
> > > 
> > > We convert icmp_id_map[][] to point to the flow table entries, rather
> > > than containing its own information.  We do still use that array for
> > > locating the right ping flows, rather than using a "flow native" form
> > > of lookup for the time being.
> > > 
> > > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > > ---
> > >  Makefile     |   6 +--
> > >  flow.c       |   9 ++++
> > >  flow.h       |   4 ++
> > >  flow_table.h |   2 +
> > >  icmp.c       | 143 +++++++++++++++++++++++----------------------------
> > >  icmp.h       |   2 +-
> > >  icmp_flow.h  |  31 +++++++++++
> > >  passt.c      |   5 --
> > >  8 files changed, 115 insertions(+), 87 deletions(-)
> > >  create mode 100644 icmp_flow.h
> > > 
> > > diff --git a/Makefile b/Makefile
> > > index 2d6a5155..47fc5448 100644
> > > --- a/Makefile
> > > +++ b/Makefile
> > > @@ -54,9 +54,9 @@ SRCS = $(PASST_SRCS) $(QRAP_SRCS)
> > >  MANPAGES = passt.1 pasta.1 qrap.1
> > >  
> > >  PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h fwd.h \
> > > -	flow_table.h icmp.h inany.h iov.h isolation.h lineread.h log.h ndp.h \
> > > -	netlink.h packet.h passt.h pasta.h pcap.h pif.h siphash.h tap.h tcp.h \
> > > -	tcp_conn.h tcp_splice.h udp.h util.h
> > > +	flow_table.h icmp.h icmp_flow.h inany.h iov.h isolation.h lineread.h \
> > > +	log.h ndp.h netlink.h packet.h passt.h pasta.h pcap.h pif.h siphash.h \
> > > +	tap.h tcp.h tcp_conn.h tcp_splice.h udp.h util.h
> > >  HEADERS = $(PASST_HEADERS) seccomp.h
> > >  
> > >  C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_snd_wnd = 0 };
> > > diff --git a/flow.c b/flow.c
> > > index d7974d59..5835d6c0 100644
> > > --- a/flow.c
> > > +++ b/flow.c
> > > @@ -21,6 +21,8 @@ const char *flow_type_str[] = {
> > >  	[FLOW_TYPE_NONE]	= "<none>",
> > >  	[FLOW_TCP]		= "TCP connection",
> > >  	[FLOW_TCP_SPLICE]	= "TCP connection (spliced)",
> > > +	[FLOW_PING4]		= "ICMP ping sequence",
> > > +	[FLOW_PING6]		= "ICMPv6 ping sequence",
> > >  };
> > >  static_assert(ARRAY_SIZE(flow_type_str) == FLOW_NUM_TYPES,
> > >  	      "flow_type_str[] doesn't match enum flow_type");
> > > @@ -28,6 +30,8 @@ static_assert(ARRAY_SIZE(flow_type_str) == FLOW_NUM_TYPES,
> > >  const uint8_t flow_proto[] = {
> > >  	[FLOW_TCP]		= IPPROTO_TCP,
> > >  	[FLOW_TCP_SPLICE]	= IPPROTO_TCP,
> > > +	[FLOW_PING4]		= IPPROTO_ICMP,
> > > +	[FLOW_PING6]		= IPPROTO_ICMPV6,
> > >  };
> > >  static_assert(ARRAY_SIZE(flow_proto) == FLOW_NUM_TYPES,
> > >  	      "flow_proto[] doesn't match enum flow_type");
> > > @@ -294,6 +298,11 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
> > >  			if (!closed && timer)
> > >  				tcp_splice_timer(c, flow);
> > >  			break;
> > > +		case FLOW_PING4:
> > > +		case FLOW_PING6:
> > > +			if (timer)
> > > +				closed = icmp_ping_timer(c, flow, now);
> > > +			break;
> > >  		default:
> > >  			/* Assume other flow types don't need any handling */
> > >  			;
> > > diff --git a/flow.h b/flow.h
> > > index 8b66751b..c943c441 100644
> > > --- a/flow.h
> > > +++ b/flow.h
> > > @@ -19,6 +19,10 @@ enum flow_type {
> > >  	FLOW_TCP,
> > >  	/* A TCP connection between a host socket and ns socket */
> > >  	FLOW_TCP_SPLICE,
> > > +	/* ICMP echo requests from guest to host and matching replies back */
> > > +	FLOW_PING4,
> > > +	/* ICMPv6 echo requests from guest to host and matching replies back */
> > > +	FLOW_PING6,
> > >  
> > >  	FLOW_NUM_TYPES,
> > >  };
> > > diff --git a/flow_table.h b/flow_table.h
> > > index eecf8844..b7e5529a 100644
> > > --- a/flow_table.h
> > > +++ b/flow_table.h
> > > @@ -8,6 +8,7 @@
> > >  #define FLOW_TABLE_H
> > >  
> > >  #include "tcp_conn.h"
> > > +#include "icmp_flow.h"
> > >  
> > >  /**
> > >   * struct flow_free_cluster - Information about a cluster of free entries
> > > @@ -33,6 +34,7 @@ union flow {
> > >  	struct flow_free_cluster free;
> > >  	struct tcp_tap_conn tcp;
> > >  	struct tcp_splice_conn tcp_splice;
> > > +	struct icmp_ping_flow ping;
> > >  };
> > >  
> > >  /* Global Flow Table */
> > > diff --git a/icmp.c b/icmp.c
> > > index fb2fcafc..1caf791d 100644
> > > --- a/icmp.c
> > > +++ b/icmp.c
> > > @@ -39,24 +39,17 @@
> > >  #include "siphash.h"
> > >  #include "inany.h"
> > >  #include "icmp.h"
> > > +#include "flow_table.h"
> > >  
> > >  #define ICMP_ECHO_TIMEOUT	60 /* s, timeout for ICMP socket activity */
> > >  #define ICMP_NUM_IDS		(1U << 16)
> > >  
> > > -/**
> > > - * struct icmp_id_sock - Tracking information for single ICMP echo identifier
> > > - * @sock:	Bound socket for identifier
> > > - * @seq:	Last sequence number sent to tap, host order, -1: not sent yet
> > > - * @ts:		Last associated activity from tap, seconds
> > > - */
> > > -struct icmp_id_sock {
> > > -	int sock;
> > > -	int seq;
> > > -	time_t ts;
> > > -};
> > > +/* Sides of a flow as we use them for ping streams */
> > > +#define	SOCKSIDE	0
> > > +#define	TAPSIDE		1
> > >  
> > >  /* Indexed by ICMP echo identifier */
> > > -static struct icmp_id_sock icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS];
> > > +static struct icmp_ping_flow *icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS];
> > >  
> > >  /**
> > >   * icmp_sock_handler() - Handle new data from ICMP or ICMPv6 socket
> > > @@ -66,8 +59,8 @@ static struct icmp_id_sock icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS];
> > >   */
> > >  void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
> > >  {
> > > -	struct icmp_id_sock *const id_sock = af == AF_INET
> > > -		? &icmp_id_map[V4][ref.icmp.id] : &icmp_id_map[V6][ref.icmp.id];
> > > +	struct icmp_ping_flow *pingf = af == AF_INET
> > > +		? icmp_id_map[V4][ref.icmp.id] : icmp_id_map[V6][ref.icmp.id];
> > >  	const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6";
> > >  	union sockaddr_inany sr;
> > >  	socklen_t sl = sizeof(sr);
> > > @@ -78,6 +71,8 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
> > >  	if (c->no_icmp)
> > >  		return;
> > >  
> > > +	ASSERT(pingf);
> > > +
> > >  	n = recvfrom(ref.fd, buf, sizeof(buf), 0, &sr.sa, &sl);
> > >  	if (n < 0) {
> > >  		warn("%s: recvfrom() error on ping socket: %s",
> > > @@ -112,10 +107,10 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
> > >  
> > >  	/* In PASTA mode, we'll get any reply we send, discard them. */
> > >  	if (c->mode == MODE_PASTA) {
> > > -		if (id_sock->seq == seq)
> > > +		if (pingf->seq == seq)
> > >  			return;
> > >  
> > > -		id_sock->seq = seq;
> > > +		pingf->seq = seq;
> > >  	}
> > >  
> > >  	debug("%s: echo reply to tap, ID: %"PRIu16", seq: %"PRIu16, pname,
> > > @@ -132,16 +127,22 @@ unexpected:
> > >  }
> > >  
> > >  /**
> > > - * icmp_ping_close() - Close and clean up a ping socket
> > > + * icmp_ping_close() - Close and clean up a ping flow
> > >   * @c:		Execution context
> > > - * @id_sock:	Socket number and other info
> > > + * @pingf:	ping flow entry to close
> > >   */
> > > -static void icmp_ping_close(const struct ctx *c, struct icmp_id_sock *id_sock)
> > > +static void icmp_ping_close(const struct ctx *c,
> > > +			    const struct icmp_ping_flow *pingf)
> > >  {
> > > -	epoll_ctl(c->epollfd, EPOLL_CTL_DEL, id_sock->sock, NULL);
> > > -	close(id_sock->sock);
> > > -	id_sock->sock = -1;
> > > -	id_sock->seq = -1;
> > > +	uint16_t id = pingf->id;
> > > +
> > > +	epoll_ctl(c->epollfd, EPOLL_CTL_DEL, pingf->sock, NULL);
> > > +	close(pingf->sock);
> > > +
> > > +	if (pingf->f.type == FLOW_PING4)
> > > +		icmp_id_map[V4][id] = NULL;
> > > +	else
> > > +		icmp_id_map[V6][id] = NULL;
> > >  }
> > >  
> > >  /**
> > > @@ -151,17 +152,27 @@ static void icmp_ping_close(const struct ctx *c, struct icmp_id_sock *id_sock)
> > >   * @af:		Address family, AF_INET or AF_INET6
> > >   * @id:		ICMP id for the new socket
> > >   *
> > > - * Return: Newly opened ping socket fd, or -1 on failure
> > > + * Return: Newly opened ping flow, or NULL on failure
> > >   */
> > > -static int icmp_ping_new(const struct ctx *c, struct icmp_id_sock *id_sock,
> > > -			 sa_family_t af, uint16_t id)
> > > +static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
> > > +					    struct icmp_ping_flow **id_sock,  
> > 
> > I'm not quite sure why we still need id_sock passed as parameter, and
> > what it's supposed to contain (you haven't updated the function
> > comment).  
> 
> Oops.

I can change it according to your comment below:

 * @id_sock:	Pointer to ping flow entry slot in icmp_id_map[] to update

?

> > Now that all the information is encapsulated in the flow, and you
> > return the new flow, with a trivial change in icmp_tap_handler(),
> > couldn't we just drop id_sock here?  
> 
> No, because this is only the "partial" flow table implementation,
> lacking the address information in the common part of the flow.
> Without that, while the information on a single flow is in the flow
> table, we still need the icmp_id_map[] arrays to *find* the relevant
> flow.  id_sock passed here is the relevant "slot" in those arrays, so
> we can update them to index the new flow (that's why it's a (ping_flow
> **) not a (ping_flow *)).

Ah, right, of course, I thought the caller could/should take care of
updating the slot, too, but now I understand the commit message, thanks.

-- 
Stefano


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/3] icmp: Flow based error reporting
  2024-02-29  4:15 ` [PATCH 2/3] icmp: Flow based error reporting David Gibson
@ 2024-03-07 23:26   ` Stefano Brivio
  2024-03-08  0:49     ` David Gibson
  0 siblings, 1 reply; 12+ messages in thread
From: Stefano Brivio @ 2024-03-07 23:26 UTC (permalink / raw)
  To: David Gibson; +Cc: passt-dev

On Thu, 29 Feb 2024 15:15:33 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> Use flow_dbg() and flow_err() helpers to generate flow-linked error
> messages in most places.  Make a few small improvements to the messages
> while we're at it.  This allows us to avoid the awkward 'pname' variables
> since whether we're dealing with ICMP or ICMPv6 is already built into the
> flow type which these helpers include.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  icmp.c | 31 ++++++++++++++-----------------
>  1 file changed, 14 insertions(+), 17 deletions(-)
> 
> diff --git a/icmp.c b/icmp.c
> index 1caf791d..63adafcf 100644
> --- a/icmp.c
> +++ b/icmp.c
> @@ -61,7 +61,6 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
>  {
>  	struct icmp_ping_flow *pingf = af == AF_INET
>  		? icmp_id_map[V4][ref.icmp.id] : icmp_id_map[V6][ref.icmp.id];
> -	const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6";
>  	union sockaddr_inany sr;
>  	socklen_t sl = sizeof(sr);
>  	char buf[USHRT_MAX];
> @@ -75,8 +74,7 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
>  
>  	n = recvfrom(ref.fd, buf, sizeof(buf), 0, &sr.sa, &sl);
>  	if (n < 0) {
> -		warn("%s: recvfrom() error on ping socket: %s",
> -		     pname, strerror(errno));
> +		flow_err(pingf, "recvfrom() error: %s", strerror(errno));
>  		return;
>  	}
>  	if (sr.sa_family != af)
> @@ -113,8 +111,9 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
>  		pingf->seq = seq;
>  	}
>  
> -	debug("%s: echo reply to tap, ID: %"PRIu16", seq: %"PRIu16, pname,
> -	      ref.icmp.id, seq);
> +	flow_dbg(pingf, "echo reply to tap, ID: %"PRIu16", seq: %"PRIu16,
> +		 ref.icmp.id, seq);
> +
>  	if (af == AF_INET)
>  		tap_icmp4_send(c, sr.sa4.sin_addr, tap_ip4_daddr(c), buf, n);
>  	else if (af == AF_INET6)
> @@ -123,7 +122,7 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
>  	return;
>  
>  unexpected:
> -	warn("%s: Unexpected packet on ping socket", pname);
> +	flow_err(pingf, "Unexpected packet on ping socket");
>  }
>  
>  /**
> @@ -158,7 +157,6 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
>  					    struct icmp_ping_flow **id_sock,
>  					    sa_family_t af, uint16_t id)
>  {
> -	const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6";
>  	uint8_t flowtype = af == AF_INET ? FLOW_PING4 : FLOW_PING6;
>  	union icmp_epoll_ref iref = { .id = id };
>  	union flow *flow = flow_alloc();
> @@ -195,9 +193,9 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
>  	if (pingf->sock > FD_REF_MAX)
>  		goto cancel;
>  
> -	*id_sock = pingf;
> +	flow_dbg(pingf, "new socket %i for echo ID %"PRIu16, pingf->sock, id);
>  
> -	debug("%s: new socket %i for echo ID %"PRIu16, pname, pingf->sock, id);
> +	*id_sock = pingf;
>  
>  	return pingf;
>  
> @@ -222,7 +220,6 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
>  		     const void *saddr, const void *daddr,
>  		     const struct pool *p, const struct timespec *now)
>  {
> -	const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6";
>  	union sockaddr_inany sa = { .sa_family = af };
>  	const socklen_t sl = af == AF_INET ? sizeof(sa.sa4) : sizeof(sa.sa6);
>  	struct icmp_ping_flow *pingf, **id_sock;
> @@ -276,13 +273,13 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
>  
>  	pingf->ts = now->tv_sec;
>  
> -	if (sendto(pingf->sock, pkt, plen, MSG_NOSIGNAL, &sa.sa, sl) < 0) {
> -		debug("%s: failed to relay request to socket: %s",
> -		      pname, strerror(errno));
> -	} else {
> -		debug("%s: echo request to socket, ID: %"PRIu16", seq: %"PRIu16,
> -		      pname, id, seq);
> -	}
> +	if (sendto(pingf->sock, pkt, plen, MSG_NOSIGNAL, &sa.sa, sl) < 0)
> +		flow_dbg(pingf, "failed to relay request to socket: %s",
> +			 strerror(errno));
> +	else
> +		flow_dbg(pingf,
> +			 "echo request to socket, ID: %"PRIu16", seq: %"PRIu16,
> +			 id, seq);

I can add the usual curly brackets on merge. The rest of the series
looks good to me.

-- 
Stefano


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] icmp: Store ping socket information in flow table
  2024-03-07 23:25       ` Stefano Brivio
@ 2024-03-08  0:48         ` David Gibson
  0 siblings, 0 replies; 12+ messages in thread
From: David Gibson @ 2024-03-08  0:48 UTC (permalink / raw)
  To: Stefano Brivio; +Cc: passt-dev

[-- Attachment #1: Type: text/plain, Size: 11061 bytes --]

On Fri, Mar 08, 2024 at 12:25:44AM +0100, Stefano Brivio wrote:
> On Thu, 7 Mar 2024 21:24:28 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > On Thu, Mar 07, 2024 at 09:53:15AM +0100, Stefano Brivio wrote:
> > > Apologies for the delay, I'm still reviewing 3/3, but I have a question
> > > meanwhile:
> > > 
> > > On Thu, 29 Feb 2024 15:15:32 +1100
> > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > >   
> > > > Currently icmp_id_map[][] stores information about ping sockets in a
> > > > bespoke structure.  Move the same information into new types of flow
> > > > in the flow table.  To match that change, replace the existing ICMP
> > > > timer with a flow-based timer for expiring ping sockets.  This has the
> > > > advantage that we only need to scan the active flows, not all possible
> > > > ids.
> > > > 
> > > > We convert icmp_id_map[][] to point to the flow table entries, rather
> > > > than containing its own information.  We do still use that array for
> > > > locating the right ping flows, rather than using a "flow native" form
> > > > of lookup for the time being.
> > > > 
> > > > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > > > ---
> > > >  Makefile     |   6 +--
> > > >  flow.c       |   9 ++++
> > > >  flow.h       |   4 ++
> > > >  flow_table.h |   2 +
> > > >  icmp.c       | 143 +++++++++++++++++++++++----------------------------
> > > >  icmp.h       |   2 +-
> > > >  icmp_flow.h  |  31 +++++++++++
> > > >  passt.c      |   5 --
> > > >  8 files changed, 115 insertions(+), 87 deletions(-)
> > > >  create mode 100644 icmp_flow.h
> > > > 
> > > > diff --git a/Makefile b/Makefile
> > > > index 2d6a5155..47fc5448 100644
> > > > --- a/Makefile
> > > > +++ b/Makefile
> > > > @@ -54,9 +54,9 @@ SRCS = $(PASST_SRCS) $(QRAP_SRCS)
> > > >  MANPAGES = passt.1 pasta.1 qrap.1
> > > >  
> > > >  PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h fwd.h \
> > > > -	flow_table.h icmp.h inany.h iov.h isolation.h lineread.h log.h ndp.h \
> > > > -	netlink.h packet.h passt.h pasta.h pcap.h pif.h siphash.h tap.h tcp.h \
> > > > -	tcp_conn.h tcp_splice.h udp.h util.h
> > > > +	flow_table.h icmp.h icmp_flow.h inany.h iov.h isolation.h lineread.h \
> > > > +	log.h ndp.h netlink.h packet.h passt.h pasta.h pcap.h pif.h siphash.h \
> > > > +	tap.h tcp.h tcp_conn.h tcp_splice.h udp.h util.h
> > > >  HEADERS = $(PASST_HEADERS) seccomp.h
> > > >  
> > > >  C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_snd_wnd = 0 };
> > > > diff --git a/flow.c b/flow.c
> > > > index d7974d59..5835d6c0 100644
> > > > --- a/flow.c
> > > > +++ b/flow.c
> > > > @@ -21,6 +21,8 @@ const char *flow_type_str[] = {
> > > >  	[FLOW_TYPE_NONE]	= "<none>",
> > > >  	[FLOW_TCP]		= "TCP connection",
> > > >  	[FLOW_TCP_SPLICE]	= "TCP connection (spliced)",
> > > > +	[FLOW_PING4]		= "ICMP ping sequence",
> > > > +	[FLOW_PING6]		= "ICMPv6 ping sequence",
> > > >  };
> > > >  static_assert(ARRAY_SIZE(flow_type_str) == FLOW_NUM_TYPES,
> > > >  	      "flow_type_str[] doesn't match enum flow_type");
> > > > @@ -28,6 +30,8 @@ static_assert(ARRAY_SIZE(flow_type_str) == FLOW_NUM_TYPES,
> > > >  const uint8_t flow_proto[] = {
> > > >  	[FLOW_TCP]		= IPPROTO_TCP,
> > > >  	[FLOW_TCP_SPLICE]	= IPPROTO_TCP,
> > > > +	[FLOW_PING4]		= IPPROTO_ICMP,
> > > > +	[FLOW_PING6]		= IPPROTO_ICMPV6,
> > > >  };
> > > >  static_assert(ARRAY_SIZE(flow_proto) == FLOW_NUM_TYPES,
> > > >  	      "flow_proto[] doesn't match enum flow_type");
> > > > @@ -294,6 +298,11 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
> > > >  			if (!closed && timer)
> > > >  				tcp_splice_timer(c, flow);
> > > >  			break;
> > > > +		case FLOW_PING4:
> > > > +		case FLOW_PING6:
> > > > +			if (timer)
> > > > +				closed = icmp_ping_timer(c, flow, now);
> > > > +			break;
> > > >  		default:
> > > >  			/* Assume other flow types don't need any handling */
> > > >  			;
> > > > diff --git a/flow.h b/flow.h
> > > > index 8b66751b..c943c441 100644
> > > > --- a/flow.h
> > > > +++ b/flow.h
> > > > @@ -19,6 +19,10 @@ enum flow_type {
> > > >  	FLOW_TCP,
> > > >  	/* A TCP connection between a host socket and ns socket */
> > > >  	FLOW_TCP_SPLICE,
> > > > +	/* ICMP echo requests from guest to host and matching replies back */
> > > > +	FLOW_PING4,
> > > > +	/* ICMPv6 echo requests from guest to host and matching replies back */
> > > > +	FLOW_PING6,
> > > >  
> > > >  	FLOW_NUM_TYPES,
> > > >  };
> > > > diff --git a/flow_table.h b/flow_table.h
> > > > index eecf8844..b7e5529a 100644
> > > > --- a/flow_table.h
> > > > +++ b/flow_table.h
> > > > @@ -8,6 +8,7 @@
> > > >  #define FLOW_TABLE_H
> > > >  
> > > >  #include "tcp_conn.h"
> > > > +#include "icmp_flow.h"
> > > >  
> > > >  /**
> > > >   * struct flow_free_cluster - Information about a cluster of free entries
> > > > @@ -33,6 +34,7 @@ union flow {
> > > >  	struct flow_free_cluster free;
> > > >  	struct tcp_tap_conn tcp;
> > > >  	struct tcp_splice_conn tcp_splice;
> > > > +	struct icmp_ping_flow ping;
> > > >  };
> > > >  
> > > >  /* Global Flow Table */
> > > > diff --git a/icmp.c b/icmp.c
> > > > index fb2fcafc..1caf791d 100644
> > > > --- a/icmp.c
> > > > +++ b/icmp.c
> > > > @@ -39,24 +39,17 @@
> > > >  #include "siphash.h"
> > > >  #include "inany.h"
> > > >  #include "icmp.h"
> > > > +#include "flow_table.h"
> > > >  
> > > >  #define ICMP_ECHO_TIMEOUT	60 /* s, timeout for ICMP socket activity */
> > > >  #define ICMP_NUM_IDS		(1U << 16)
> > > >  
> > > > -/**
> > > > - * struct icmp_id_sock - Tracking information for single ICMP echo identifier
> > > > - * @sock:	Bound socket for identifier
> > > > - * @seq:	Last sequence number sent to tap, host order, -1: not sent yet
> > > > - * @ts:		Last associated activity from tap, seconds
> > > > - */
> > > > -struct icmp_id_sock {
> > > > -	int sock;
> > > > -	int seq;
> > > > -	time_t ts;
> > > > -};
> > > > +/* Sides of a flow as we use them for ping streams */
> > > > +#define	SOCKSIDE	0
> > > > +#define	TAPSIDE		1
> > > >  
> > > >  /* Indexed by ICMP echo identifier */
> > > > -static struct icmp_id_sock icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS];
> > > > +static struct icmp_ping_flow *icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS];
> > > >  
> > > >  /**
> > > >   * icmp_sock_handler() - Handle new data from ICMP or ICMPv6 socket
> > > > @@ -66,8 +59,8 @@ static struct icmp_id_sock icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS];
> > > >   */
> > > >  void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
> > > >  {
> > > > -	struct icmp_id_sock *const id_sock = af == AF_INET
> > > > -		? &icmp_id_map[V4][ref.icmp.id] : &icmp_id_map[V6][ref.icmp.id];
> > > > +	struct icmp_ping_flow *pingf = af == AF_INET
> > > > +		? icmp_id_map[V4][ref.icmp.id] : icmp_id_map[V6][ref.icmp.id];
> > > >  	const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6";
> > > >  	union sockaddr_inany sr;
> > > >  	socklen_t sl = sizeof(sr);
> > > > @@ -78,6 +71,8 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
> > > >  	if (c->no_icmp)
> > > >  		return;
> > > >  
> > > > +	ASSERT(pingf);
> > > > +
> > > >  	n = recvfrom(ref.fd, buf, sizeof(buf), 0, &sr.sa, &sl);
> > > >  	if (n < 0) {
> > > >  		warn("%s: recvfrom() error on ping socket: %s",
> > > > @@ -112,10 +107,10 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
> > > >  
> > > >  	/* In PASTA mode, we'll get any reply we send, discard them. */
> > > >  	if (c->mode == MODE_PASTA) {
> > > > -		if (id_sock->seq == seq)
> > > > +		if (pingf->seq == seq)
> > > >  			return;
> > > >  
> > > > -		id_sock->seq = seq;
> > > > +		pingf->seq = seq;
> > > >  	}
> > > >  
> > > >  	debug("%s: echo reply to tap, ID: %"PRIu16", seq: %"PRIu16, pname,
> > > > @@ -132,16 +127,22 @@ unexpected:
> > > >  }
> > > >  
> > > >  /**
> > > > - * icmp_ping_close() - Close and clean up a ping socket
> > > > + * icmp_ping_close() - Close and clean up a ping flow
> > > >   * @c:		Execution context
> > > > - * @id_sock:	Socket number and other info
> > > > + * @pingf:	ping flow entry to close
> > > >   */
> > > > -static void icmp_ping_close(const struct ctx *c, struct icmp_id_sock *id_sock)
> > > > +static void icmp_ping_close(const struct ctx *c,
> > > > +			    const struct icmp_ping_flow *pingf)
> > > >  {
> > > > -	epoll_ctl(c->epollfd, EPOLL_CTL_DEL, id_sock->sock, NULL);
> > > > -	close(id_sock->sock);
> > > > -	id_sock->sock = -1;
> > > > -	id_sock->seq = -1;
> > > > +	uint16_t id = pingf->id;
> > > > +
> > > > +	epoll_ctl(c->epollfd, EPOLL_CTL_DEL, pingf->sock, NULL);
> > > > +	close(pingf->sock);
> > > > +
> > > > +	if (pingf->f.type == FLOW_PING4)
> > > > +		icmp_id_map[V4][id] = NULL;
> > > > +	else
> > > > +		icmp_id_map[V6][id] = NULL;
> > > >  }
> > > >  
> > > >  /**
> > > > @@ -151,17 +152,27 @@ static void icmp_ping_close(const struct ctx *c, struct icmp_id_sock *id_sock)
> > > >   * @af:		Address family, AF_INET or AF_INET6
> > > >   * @id:		ICMP id for the new socket
> > > >   *
> > > > - * Return: Newly opened ping socket fd, or -1 on failure
> > > > + * Return: Newly opened ping flow, or NULL on failure
> > > >   */
> > > > -static int icmp_ping_new(const struct ctx *c, struct icmp_id_sock *id_sock,
> > > > -			 sa_family_t af, uint16_t id)
> > > > +static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
> > > > +					    struct icmp_ping_flow **id_sock,  
> > > 
> > > I'm not quite sure why we still need id_sock passed as parameter, and
> > > what it's supposed to contain (you haven't updated the function
> > > comment).  
> > 
> > Oops.
> 
> I can change it according to your comment below:
> 
>  * @id_sock:	Pointer to ping flow entry slot in icmp_id_map[] to update
> 
> ?

Sounds good.

> > > Now that all the information is encapsulated in the flow, and you
> > > return the new flow, with a trivial change in icmp_tap_handler(),
> > > couldn't we just drop id_sock here?  
> > 
> > No, because this is only the "partial" flow table implementation,
> > lacking the address information in the common part of the flow.
> > Without that, while the information on a single flow is in the flow
> > table, we still need the icmp_id_map[] arrays to *find* the relevant
> > flow.  id_sock passed here is the relevant "slot" in those arrays, so
> > we can update them to index the new flow (that's why it's a (ping_flow
> > **) not a (ping_flow *)).
> 
> Ah, right, of course, I thought the caller could/should take care of
> updating the slot, too, but now I understand the commit message, thanks.
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/3] icmp: Flow based error reporting
  2024-03-07 23:26   ` Stefano Brivio
@ 2024-03-08  0:49     ` David Gibson
  2024-03-08  6:06       ` Stefano Brivio
  0 siblings, 1 reply; 12+ messages in thread
From: David Gibson @ 2024-03-08  0:49 UTC (permalink / raw)
  To: Stefano Brivio; +Cc: passt-dev

[-- Attachment #1: Type: text/plain, Size: 4797 bytes --]

On Fri, Mar 08, 2024 at 12:26:40AM +0100, Stefano Brivio wrote:
> On Thu, 29 Feb 2024 15:15:33 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > Use flow_dbg() and flow_err() helpers to generate flow-linked error
> > messages in most places.  Make a few small improvements to the messages
> > while we're at it.  This allows us to avoid the awkward 'pname' variables
> > since whether we're dealing with ICMP or ICMPv6 is already built into the
> > flow type which these helpers include.
> > 
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> >  icmp.c | 31 ++++++++++++++-----------------
> >  1 file changed, 14 insertions(+), 17 deletions(-)
> > 
> > diff --git a/icmp.c b/icmp.c
> > index 1caf791d..63adafcf 100644
> > --- a/icmp.c
> > +++ b/icmp.c
> > @@ -61,7 +61,6 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
> >  {
> >  	struct icmp_ping_flow *pingf = af == AF_INET
> >  		? icmp_id_map[V4][ref.icmp.id] : icmp_id_map[V6][ref.icmp.id];
> > -	const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6";
> >  	union sockaddr_inany sr;
> >  	socklen_t sl = sizeof(sr);
> >  	char buf[USHRT_MAX];
> > @@ -75,8 +74,7 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
> >  
> >  	n = recvfrom(ref.fd, buf, sizeof(buf), 0, &sr.sa, &sl);
> >  	if (n < 0) {
> > -		warn("%s: recvfrom() error on ping socket: %s",
> > -		     pname, strerror(errno));
> > +		flow_err(pingf, "recvfrom() error: %s", strerror(errno));
> >  		return;
> >  	}
> >  	if (sr.sa_family != af)
> > @@ -113,8 +111,9 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
> >  		pingf->seq = seq;
> >  	}
> >  
> > -	debug("%s: echo reply to tap, ID: %"PRIu16", seq: %"PRIu16, pname,
> > -	      ref.icmp.id, seq);
> > +	flow_dbg(pingf, "echo reply to tap, ID: %"PRIu16", seq: %"PRIu16,
> > +		 ref.icmp.id, seq);
> > +
> >  	if (af == AF_INET)
> >  		tap_icmp4_send(c, sr.sa4.sin_addr, tap_ip4_daddr(c), buf, n);
> >  	else if (af == AF_INET6)
> > @@ -123,7 +122,7 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
> >  	return;
> >  
> >  unexpected:
> > -	warn("%s: Unexpected packet on ping socket", pname);
> > +	flow_err(pingf, "Unexpected packet on ping socket");
> >  }
> >  
> >  /**
> > @@ -158,7 +157,6 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
> >  					    struct icmp_ping_flow **id_sock,
> >  					    sa_family_t af, uint16_t id)
> >  {
> > -	const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6";
> >  	uint8_t flowtype = af == AF_INET ? FLOW_PING4 : FLOW_PING6;
> >  	union icmp_epoll_ref iref = { .id = id };
> >  	union flow *flow = flow_alloc();
> > @@ -195,9 +193,9 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
> >  	if (pingf->sock > FD_REF_MAX)
> >  		goto cancel;
> >  
> > -	*id_sock = pingf;
> > +	flow_dbg(pingf, "new socket %i for echo ID %"PRIu16, pingf->sock, id);
> >  
> > -	debug("%s: new socket %i for echo ID %"PRIu16, pname, pingf->sock, id);
> > +	*id_sock = pingf;
> >  
> >  	return pingf;
> >  
> > @@ -222,7 +220,6 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
> >  		     const void *saddr, const void *daddr,
> >  		     const struct pool *p, const struct timespec *now)
> >  {
> > -	const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6";
> >  	union sockaddr_inany sa = { .sa_family = af };
> >  	const socklen_t sl = af == AF_INET ? sizeof(sa.sa4) : sizeof(sa.sa6);
> >  	struct icmp_ping_flow *pingf, **id_sock;
> > @@ -276,13 +273,13 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
> >  
> >  	pingf->ts = now->tv_sec;
> >  
> > -	if (sendto(pingf->sock, pkt, plen, MSG_NOSIGNAL, &sa.sa, sl) < 0) {
> > -		debug("%s: failed to relay request to socket: %s",
> > -		      pname, strerror(errno));
> > -	} else {
> > -		debug("%s: echo request to socket, ID: %"PRIu16", seq: %"PRIu16,
> > -		      pname, id, seq);
> > -	}
> > +	if (sendto(pingf->sock, pkt, plen, MSG_NOSIGNAL, &sa.sa, sl) < 0)
> > +		flow_dbg(pingf, "failed to relay request to socket: %s",
> > +			 strerror(errno));
> > +	else
> > +		flow_dbg(pingf,
> > +			 "echo request to socket, ID: %"PRIu16", seq: %"PRIu16,
> > +			 id, seq);
> 
> I can add the usual curly brackets on merge. The rest of the series
> looks good to me.

Thanks.  It may take me a minute to internalise the "braces if > 1
physical line" rule.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/3] icmp: Flow based error reporting
  2024-03-08  0:49     ` David Gibson
@ 2024-03-08  6:06       ` Stefano Brivio
  0 siblings, 0 replies; 12+ messages in thread
From: Stefano Brivio @ 2024-03-08  6:06 UTC (permalink / raw)
  To: David Gibson; +Cc: passt-dev

On Fri, 8 Mar 2024 11:49:27 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> On Fri, Mar 08, 2024 at 12:26:40AM +0100, Stefano Brivio wrote:
> > On Thu, 29 Feb 2024 15:15:33 +1100
> > David Gibson <david@gibson.dropbear.id.au> wrote:
> >   
> > > Use flow_dbg() and flow_err() helpers to generate flow-linked error
> > > messages in most places.  Make a few small improvements to the messages
> > > while we're at it.  This allows us to avoid the awkward 'pname' variables
> > > since whether we're dealing with ICMP or ICMPv6 is already built into the
> > > flow type which these helpers include.
> > > 
> > > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > > ---
> > >  icmp.c | 31 ++++++++++++++-----------------
> > >  1 file changed, 14 insertions(+), 17 deletions(-)
> > > 
> > > diff --git a/icmp.c b/icmp.c
> > > index 1caf791d..63adafcf 100644
> > > --- a/icmp.c
> > > +++ b/icmp.c
> > > @@ -61,7 +61,6 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
> > >  {
> > >  	struct icmp_ping_flow *pingf = af == AF_INET
> > >  		? icmp_id_map[V4][ref.icmp.id] : icmp_id_map[V6][ref.icmp.id];
> > > -	const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6";
> > >  	union sockaddr_inany sr;
> > >  	socklen_t sl = sizeof(sr);
> > >  	char buf[USHRT_MAX];
> > > @@ -75,8 +74,7 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
> > >  
> > >  	n = recvfrom(ref.fd, buf, sizeof(buf), 0, &sr.sa, &sl);
> > >  	if (n < 0) {
> > > -		warn("%s: recvfrom() error on ping socket: %s",
> > > -		     pname, strerror(errno));
> > > +		flow_err(pingf, "recvfrom() error: %s", strerror(errno));
> > >  		return;
> > >  	}
> > >  	if (sr.sa_family != af)
> > > @@ -113,8 +111,9 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
> > >  		pingf->seq = seq;
> > >  	}
> > >  
> > > -	debug("%s: echo reply to tap, ID: %"PRIu16", seq: %"PRIu16, pname,
> > > -	      ref.icmp.id, seq);
> > > +	flow_dbg(pingf, "echo reply to tap, ID: %"PRIu16", seq: %"PRIu16,
> > > +		 ref.icmp.id, seq);
> > > +
> > >  	if (af == AF_INET)
> > >  		tap_icmp4_send(c, sr.sa4.sin_addr, tap_ip4_daddr(c), buf, n);
> > >  	else if (af == AF_INET6)
> > > @@ -123,7 +122,7 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
> > >  	return;
> > >  
> > >  unexpected:
> > > -	warn("%s: Unexpected packet on ping socket", pname);
> > > +	flow_err(pingf, "Unexpected packet on ping socket");
> > >  }
> > >  
> > >  /**
> > > @@ -158,7 +157,6 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
> > >  					    struct icmp_ping_flow **id_sock,
> > >  					    sa_family_t af, uint16_t id)
> > >  {
> > > -	const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6";
> > >  	uint8_t flowtype = af == AF_INET ? FLOW_PING4 : FLOW_PING6;
> > >  	union icmp_epoll_ref iref = { .id = id };
> > >  	union flow *flow = flow_alloc();
> > > @@ -195,9 +193,9 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
> > >  	if (pingf->sock > FD_REF_MAX)
> > >  		goto cancel;
> > >  
> > > -	*id_sock = pingf;
> > > +	flow_dbg(pingf, "new socket %i for echo ID %"PRIu16, pingf->sock, id);
> > >  
> > > -	debug("%s: new socket %i for echo ID %"PRIu16, pname, pingf->sock, id);
> > > +	*id_sock = pingf;
> > >  
> > >  	return pingf;
> > >  
> > > @@ -222,7 +220,6 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
> > >  		     const void *saddr, const void *daddr,
> > >  		     const struct pool *p, const struct timespec *now)
> > >  {
> > > -	const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6";
> > >  	union sockaddr_inany sa = { .sa_family = af };
> > >  	const socklen_t sl = af == AF_INET ? sizeof(sa.sa4) : sizeof(sa.sa6);
> > >  	struct icmp_ping_flow *pingf, **id_sock;
> > > @@ -276,13 +273,13 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
> > >  
> > >  	pingf->ts = now->tv_sec;
> > >  
> > > -	if (sendto(pingf->sock, pkt, plen, MSG_NOSIGNAL, &sa.sa, sl) < 0) {
> > > -		debug("%s: failed to relay request to socket: %s",
> > > -		      pname, strerror(errno));
> > > -	} else {
> > > -		debug("%s: echo request to socket, ID: %"PRIu16", seq: %"PRIu16,
> > > -		      pname, id, seq);
> > > -	}
> > > +	if (sendto(pingf->sock, pkt, plen, MSG_NOSIGNAL, &sa.sa, sl) < 0)
> > > +		flow_dbg(pingf, "failed to relay request to socket: %s",
> > > +			 strerror(errno));
> > > +	else
> > > +		flow_dbg(pingf,
> > > +			 "echo request to socket, ID: %"PRIu16", seq: %"PRIu16,
> > > +			 id, seq);  
> > 
> > I can add the usual curly brackets on merge. The rest of the series
> > looks good to me.  
> 
> Thanks.  It may take me a minute to internalise the "braces if > 1
> physical line" rule.

It took me years. I mean, we don't have to adhere to "Linux -net"
style, but it has a couple of advantages such as being somewhat
formalised and the existence of checkpatch.pl. I also happen to be
familiar with it.

If we decide not to, though, then we should probably fix up everything
at once.

-- 
Stefano


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 0/3] Basic integration of ICMP with flow table
  2024-02-29  4:15 [PATCH 0/3] Basic integration of ICMP with flow table David Gibson
                   ` (2 preceding siblings ...)
  2024-02-29  4:15 ` [PATCH 3/3] icmp: Use 'flowside' epoll references for ping sockets David Gibson
@ 2024-03-13 10:08 ` Stefano Brivio
  3 siblings, 0 replies; 12+ messages in thread
From: Stefano Brivio @ 2024-03-13 10:08 UTC (permalink / raw)
  To: David Gibson; +Cc: passt-dev

On Thu, 29 Feb 2024 15:15:31 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> I realized that even with the rudimentary form of the flow table we
> have now, it's possible to integrate ICMP (ping).  This even
> simplifies a little bit of logic in the ICMP code.
> 
> This is based on the address handling clean up series.
> 
> David Gibson (3):
>   icmp: Store ping socket information in flow table
>   icmp: Flow based error reporting
>   icmp: Use 'flowside' epoll references for ping sockets

Applied.

-- 
Stefano


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2024-03-13 10:09 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-02-29  4:15 [PATCH 0/3] Basic integration of ICMP with flow table David Gibson
2024-02-29  4:15 ` [PATCH 1/3] icmp: Store ping socket information in " David Gibson
2024-03-07  8:53   ` Stefano Brivio
2024-03-07 10:24     ` David Gibson
2024-03-07 23:25       ` Stefano Brivio
2024-03-08  0:48         ` David Gibson
2024-02-29  4:15 ` [PATCH 2/3] icmp: Flow based error reporting David Gibson
2024-03-07 23:26   ` Stefano Brivio
2024-03-08  0:49     ` David Gibson
2024-03-08  6:06       ` Stefano Brivio
2024-02-29  4:15 ` [PATCH 3/3] icmp: Use 'flowside' epoll references for ping sockets David Gibson
2024-03-13 10:08 ` [PATCH 0/3] Basic integration of ICMP with flow table Stefano Brivio

Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).