From mboxrd@z Thu Jan  1 00:00:00 1970
Received: from gandalf.ozlabs.org (gandalf.ozlabs.org [150.107.74.76])
	by passt.top (Postfix) with ESMTPS id D79545A0272
	for <passt-dev@passt.top>; Thu,  3 Aug 2023 09:20:08 +0200 (CEST)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
	d=gibson.dropbear.id.au; s=201602; t=1691047199;
	bh=29wxDuETaztTtfAlwnfj9xoGTEWc5ECMmVTk6rzdKZ0=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=pX/qrgS0lwRU1x9zUi1TgDIUSrrEOFSNuFnda5ZObvhXEdUoucPh4TcB8FpDk+TUC
	 AVzhwNFwf+E/UQe+0cwjcs+IyDruXm/pEb8hDyTisRUxtTj7X2RNHYXbd17nG7y5iV
	 YVhENOoATvghyGUq12i05QbSLwZWFVvuV1obTJho=
Received: by gandalf.ozlabs.org (Postfix, from userid 1007)
	id 4RGgGR6dYtz4wyP; Thu,  3 Aug 2023 17:19:59 +1000 (AEST)
From: David Gibson <david@gibson.dropbear.id.au>
To: passt-dev@passt.top,
	Stefano Brivio <sbrivio@redhat.com>
Subject: [PATCH v2 12/17] netlink: Split nl_req() to allow processing multiple response datagrams
Date: Thu,  3 Aug 2023 17:19:51 +1000
Message-ID: <20230803071956.3198452-13-david@gibson.dropbear.id.au>
X-Mailer: git-send-email 2.41.0
In-Reply-To: <20230803071956.3198452-1-david@gibson.dropbear.id.au>
References: <20230803071956.3198452-1-david@gibson.dropbear.id.au>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Message-ID-Hash: NC24N6KX2UJDVBDRMAVYOCINPOLNXUVV
X-Message-ID-Hash: NC24N6KX2UJDVBDRMAVYOCINPOLNXUVV
X-MailFrom: dgibson@gandalf.ozlabs.org
X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header
CC: David Gibson <david@gibson.dropbear.id.au>
X-Mailman-Version: 3.3.8
Precedence: list
List-Id: Development discussion and patches for passt <passt-dev.passt.top>
Archived-At: <https://archives.passt.top/passt-dev/20230803071956.3198452-13-david@gibson.dropbear.id.au/>
Archived-At: <https://passt.top/hyperkitty/list/passt-dev@passt.top/message/NC24N6KX2UJDVBDRMAVYOCINPOLNXUVV/>
List-Archive: <https://archives.passt.top/passt-dev/>
List-Archive: <https://passt.top/hyperkitty/list/passt-dev@passt.top/>
List-Help: <mailto:passt-dev-request@passt.top?subject=help>
List-Owner: <mailto:passt-dev-owner@passt.top>
List-Post: <mailto:passt-dev@passt.top>
List-Subscribe: <mailto:passt-dev-join@passt.top>
List-Unsubscribe: <mailto:passt-dev-leave@passt.top>

Currently nl_req() sends the request, and receives a single response
datagram which we then process.  However, a single request can result in
multiple response datagrams.  That happens nearly all the time for DUMP
requests, where the 'DONE' message usually comes in a second datagram after
the NEW{LINK|ADDR|ROUTE} messages.  It can also happen if there are just
too many objects to dump in a single datagram.

Allow our netlink code to process multiple response datagrams by splitting
nl_req() into three different helpers: nl_send() just sends a request,
without getting a response.  nl_status() checks a single message to see if
it indicates the end of the reponses for our request.  nl_next() moves onto
the next response message, whether it's in a datagram we already received
or we need to recv() a new one.  We also add a 'for'-style macro to use
these to step through every response message to a request across multiple
datagrams.

While we're at it, be more thourough with checking that our sequence
numbers are in sync.

Link: https://bugs.passt.top/show_bug.cgi?id=67

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 netlink.c | 181 ++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 113 insertions(+), 68 deletions(-)

diff --git a/netlink.c b/netlink.c
index d5c9439..539a223 100644
--- a/netlink.c
+++ b/netlink.c
@@ -104,18 +104,17 @@ fail:
 }
 
 /**
- * nl_req() - Prepare and send netlink request, read response
+ * nl_send() - Prepare and send netlink request
  * @s:		Netlink socket
- * @buf:	Buffer for response (at least NLBUFSIZ long)
  * @req:	Request (will fill netlink header)
  * @type:	Request type
  * @flags:	Extra request flags (NLM_F_REQUEST and NLM_F_ACK assumed)
  * @len:	Request length
  *
- * Return: received length on success, terminates on error
+ * Return: sequence number of request on success, terminates on error
  */
-static ssize_t nl_req(int s, char *buf, void *req,
-		      uint16_t type, uint16_t flags, ssize_t len)
+static uint16_t nl_send(int s, void *req, uint16_t type,
+		       uint16_t flags, ssize_t len)
 {
 	char flush[NLBUFSIZ];
 	struct nlmsghdr *nh;
@@ -148,13 +147,80 @@ static ssize_t nl_req(int s, char *buf, void *req,
 	else if (n < len)
 		die("netlink: Short send (%lu of %lu bytes)", n, len);
 
-	n = recv(s, buf, NLBUFSIZ, 0);
-	if (n < 0)
-		die("netlink: Failed to recv(): %s", strerror(errno));
+	return nh->nlmsg_seq;
+}
+
+/**
+ * nl_status() - Check status given by a netlink response
+ * @nh:		Netlink response header
+ * @n:		Remaining space in response buffer from @nh
+ * @seq:	Request sequence number we expect a response to
+ *
+ * Return: 0 if @nh indicated successful completion,
+ *         < 0, negative error code if @nh indicated failure
+ *         > 0 @n if there are more responses to request @seq
+ *     terminates if sequence numbers are out of sync
+ */
+static int nl_status(const struct nlmsghdr *nh, ssize_t n, uint16_t seq)
+{
+	ASSERT(NLMSG_OK(nh, n));
+
+	if (nh->nlmsg_seq != seq)
+		die("netlink: Unexpected sequence number (%hu != %hu)",
+		    nh->nlmsg_seq, seq);
+
+	if (nh->nlmsg_type == NLMSG_DONE) {
+		return 0;
+	}
+	if (nh->nlmsg_type == NLMSG_ERROR) {
+		struct nlmsgerr *errmsg = (struct nlmsgerr *)NLMSG_DATA(nh);
+		return errmsg->error;
+	}
 
 	return n;
 }
 
+/**
+ * nl_next() - Get next netlink response message, recv()ing if necessary
+ * @s:		Netlink socket
+ * @buf:	Buffer for responses (at least NLBUFSIZ long)
+ * @nh:		Previous message, or NULL if there are none
+ * @n:		Variable with remaining unread bytes in buffer (updated)
+ *
+ * Return: pointer to next unread netlink response message (may block)
+ */
+static struct nlmsghdr *nl_next(int s, char *buf, struct nlmsghdr *nh, ssize_t *n)
+{
+	if (nh) {
+		nh = NLMSG_NEXT(nh, *n);
+		if (NLMSG_OK(nh, *n))
+			return nh;
+	}
+
+	*n = recv(s, buf, NLBUFSIZ, 0);
+	if (*n < 0)
+		die("netlink: Failed to recv(): %s", strerror(errno));
+
+	nh = (struct nlmsghdr *)buf;
+	if (!NLMSG_OK(nh, *n))
+		die("netlink: Response datagram with no message");
+
+	return nh;
+}
+
+/**
+ * nl_foreach - 'for' type macro to step through netlink response messages
+ * @nh:		Steps through each response header (struct nlmsghdr *)
+ * @status:	When loop exits indicates if there was an error (ssize_t)
+ * @s:		Netlink socket
+ * @buf:	Buffer for responses (at least NLBUFSIZ long)
+ * @seq:	Sequence number of request we're getting responses for
+  */
+#define nl_foreach(nh, status, s, buf, seq)				\
+	for ((nh) = nl_next((s), (buf), NULL, &(status));		\
+	     ((status) = nl_status((nh), (status), (seq))) > 0;		\
+	     (nh) = nl_next((s), (buf), (nh), &(status)))
+
 /**
  * nl_do() - Send netlink "do" request, and wait for acknowledgement
  * @s:		Netlink socket
@@ -169,31 +235,14 @@ static int nl_do(int s, void *req, uint16_t type, uint16_t flags, ssize_t len)
 {
 	struct nlmsghdr *nh;
 	char buf[NLBUFSIZ];
+	ssize_t status;
 	uint16_t seq;
-	ssize_t n;
-
-	n = nl_req(s, buf, req, type, flags, len);
-	seq = ((struct nlmsghdr *)req)->nlmsg_seq;
 
-	for (nh = (struct nlmsghdr *)buf;
-	     NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) {
-		struct nlmsgerr *errmsg;
+	seq = nl_send(s, req, type, flags, len);
+	nl_foreach(nh, status, s, buf, seq)
+		warn("netlink: Unexpected response message");
 
-		if (nh->nlmsg_seq != seq)
-			die("netlink: Unexpected response sequence number");
-
-		switch (nh->nlmsg_type) {
-		case NLMSG_DONE:
-			return 0;
-		case NLMSG_ERROR:
-			errmsg = (struct nlmsgerr *)NLMSG_DATA(nh);
-			return errmsg->error;
-		default:
-			warn("netlink: Unexpected response message");
-		}
-	}
-
-	die("netlink: Missing acknowledgement of request");
+	return status;
 }
 
 /**
@@ -215,14 +264,12 @@ unsigned int nl_get_ext_if(int s, sa_family_t af)
 	struct nlmsghdr *nh;
 	struct rtattr *rta;
 	char buf[NLBUFSIZ];
-	ssize_t n;
+	ssize_t status;
+	uint16_t seq;
 	size_t na;
 
-	n = nl_req(s, buf, &req, RTM_GETROUTE, NLM_F_DUMP, sizeof(req));
-
-	nh = (struct nlmsghdr *)buf;
-
-	for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) {
+	seq = nl_send(s, &req, RTM_GETROUTE, NLM_F_DUMP, sizeof(req));
+	nl_foreach(nh, status, s, buf, seq) {
 		struct rtmsg *rtm = (struct rtmsg *)NLMSG_DATA(nh);
 
 		if (rtm->rtm_dst_len || rtm->rtm_family != af)
@@ -270,13 +317,11 @@ void nl_route_get_def(int s, unsigned int ifi, sa_family_t af, void *gw)
 	};
 	struct nlmsghdr *nh;
 	char buf[NLBUFSIZ];
-	ssize_t n;
-
-	n = nl_req(s, buf, &req, RTM_GETROUTE, NLM_F_DUMP, sizeof(req));
+	ssize_t status;
+	uint16_t seq;
 
-	for (nh = (struct nlmsghdr *)buf;
-	     NLMSG_OK(nh, n) && nh->nlmsg_type != NLMSG_DONE;
-	     nh = NLMSG_NEXT(nh, n)) {
+	seq = nl_send(s, &req, RTM_GETROUTE, NLM_F_DUMP, sizeof(req));
+	nl_foreach(nh, status, s, buf, seq) {
 		struct rtmsg *rtm = (struct rtmsg *)NLMSG_DATA(nh);
 		struct rtattr *rta;
 		size_t na;
@@ -392,18 +437,23 @@ void nl_route_dup(int s_src, unsigned int ifi_src,
 		.rta.rta_len	  = RTA_LENGTH(sizeof(unsigned int)),
 		.ifi		  = ifi_src,
 	};
+	ssize_t nlmsgs_size, status;
 	unsigned dup_routes = 0;
-	ssize_t n, nlmsgs_size;
 	struct nlmsghdr *nh;
 	char buf[NLBUFSIZ];
+	uint16_t seq;
 	unsigned i;
 
-	nlmsgs_size = nl_req(s_src, buf, &req,
-			     RTM_GETROUTE, NLM_F_DUMP, sizeof(req));
+	seq = nl_send(s_src, &req, RTM_GETROUTE, NLM_F_DUMP, sizeof(req));
 
-	for (nh = (struct nlmsghdr *)buf, n = nlmsgs_size;
-	     NLMSG_OK(nh, n) && nh->nlmsg_type != NLMSG_DONE;
-	     nh = NLMSG_NEXT(nh, n)) {
+	/* nl_foreach() will step through multiple response datagrams,
+	 * which we don't want here because we need to have all the
+	 * routes in the buffer at once.
+	 */
+	nh = nl_next(s_src, buf, NULL, &nlmsgs_size);
+	for (status = nlmsgs_size;
+	     NLMSG_OK(nh, status) && (status = nl_status(nh, status, seq)) > 0;
+	     nh = NLMSG_NEXT(nh, status)) {
 		struct rtmsg *rtm = (struct rtmsg *)NLMSG_DATA(nh);
 		struct rtattr *rta;
 		size_t na;
@@ -428,9 +478,9 @@ void nl_route_dup(int s_src, unsigned int ifi_src,
 	 * to calculate dependencies: let the kernel do that.
 	 */
 	for (i = 0; i < dup_routes; i++) {
-		for (nh = (struct nlmsghdr *)buf, n = nlmsgs_size;
-		     NLMSG_OK(nh, n) && nh->nlmsg_type != NLMSG_DONE;
-		     nh = NLMSG_NEXT(nh, n)) {
+		for (nh = (struct nlmsghdr *)buf, status = nlmsgs_size;
+		     NLMSG_OK(nh, status);
+		     nh = NLMSG_NEXT(nh, status)) {
 			uint16_t flags = nh->nlmsg_flags;
 
 			if (nh->nlmsg_type != RTM_NEWROUTE)
@@ -464,13 +514,11 @@ void nl_addr_get(int s, unsigned int ifi, sa_family_t af,
 	};
 	struct nlmsghdr *nh;
 	char buf[NLBUFSIZ];
-	ssize_t n;
-
-	n = nl_req(s, buf, &req, RTM_GETADDR, NLM_F_DUMP, sizeof(req));
+	ssize_t status;
+	uint16_t seq;
 
-	for (nh = (struct nlmsghdr *)buf;
-	     NLMSG_OK(nh, n) && nh->nlmsg_type != NLMSG_DONE;
-	     nh = NLMSG_NEXT(nh, n)) {
+	seq = nl_send(s, &req, RTM_GETADDR, NLM_F_DUMP, sizeof(req));
+	nl_foreach(nh, status, s, buf, seq) {
 		struct ifaddrmsg *ifa = (struct ifaddrmsg *)NLMSG_DATA(nh);
 		struct rtattr *rta;
 		size_t na;
@@ -587,13 +635,11 @@ void nl_addr_dup(int s_src, unsigned int ifi_src,
 	};
 	char buf[NLBUFSIZ];
 	struct nlmsghdr *nh;
-	ssize_t n;
-
-	n = nl_req(s_src, buf, &req, RTM_GETADDR, NLM_F_DUMP, sizeof(req));
+	ssize_t status;
+	uint16_t seq;
 
-	for (nh = (struct nlmsghdr *)buf;
-	     NLMSG_OK(nh, n) && nh->nlmsg_type != NLMSG_DONE;
-	     nh = NLMSG_NEXT(nh, n)) {
+	seq = nl_send(s_src, &req, RTM_GETADDR, NLM_F_DUMP, sizeof(req));
+	nl_foreach(nh, status, s_src, buf, seq) {
 		struct ifaddrmsg *ifa;
 		struct rtattr *rta;
 		size_t na;
@@ -638,12 +684,11 @@ void nl_link_get_mac(int s, unsigned int ifi, void *mac)
 	};
 	struct nlmsghdr *nh;
 	char buf[NLBUFSIZ];
-	ssize_t n;
+	ssize_t status;
+	uint16_t seq;
 
-	n = nl_req(s, buf, &req, RTM_GETLINK, 0, sizeof(req));
-	for (nh = (struct nlmsghdr *)buf;
-	     NLMSG_OK(nh, n) && nh->nlmsg_type != NLMSG_DONE;
-	     nh = NLMSG_NEXT(nh, n)) {
+	seq = nl_send(s, &req, RTM_GETLINK, 0, sizeof(req));
+	nl_foreach(nh, status, s, buf, seq) {
 		struct ifinfomsg *ifm = (struct ifinfomsg *)NLMSG_DATA(nh);
 		struct rtattr *rta;
 		size_t na;
-- 
2.41.0