public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>, passt-dev@passt.top
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH 06/17] netlink: Make nl_*_dup() use a separate datagram for each request
Date: Mon, 24 Jul 2023 16:09:25 +1000	[thread overview]
Message-ID: <20230724060936.952659-7-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20230724060936.952659-1-david@gibson.dropbear.id.au>

nl_req() is designed to handle a single netlink request message: it only
receives a single reply datagram for the request, and only waits for a
single NLMSG_DONE or NLMSG_ERROR message at the beginning to clear out
things from previous requests.

However, in both nl_addr_dup() and nl_route_dup() we can send multiple
request messages as a single datagram, with a single nl_req() call.
This can easily mean that the replies nl_req() collects get out of
sync with requests.  We only get away with this because after we call
these functions we don't make any netlink calls where we need to parse
the replies.

This is fragile, so alter nl_*_dup() to make an nl_req() call for each
address it is adding in the target namespace.

For nl_route_dup() this fixes an additional minor problem: because
routes can have dependencies, some of the route add requests might
fail on the first attempt, so we need to repeat the requests a number
of times.  When we did that, we weren't updating the sequence number
on each new attempt.  This works, but not updating the sequence number
for each new request isn't ideal.  Now that we're making the requests
one at a time, it's easier to make sure we update the sequence number
each time.

Link: https://bugs.passt.top/show_bug.cgi?id=67

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 netlink.c | 50 +++++++++++++++++++++++++++-----------------------
 1 file changed, 27 insertions(+), 23 deletions(-)

diff --git a/netlink.c b/netlink.c
index 72044cd..bd76098 100644
--- a/netlink.c
+++ b/netlink.c
@@ -351,18 +351,16 @@ void nl_route_dup(int s_src, unsigned int ifi_src,
 		.rta.rta_len	  = RTA_LENGTH(sizeof(unsigned int)),
 		.ifi		  = ifi_src,
 	};
-	char buf[NLBUFSIZ], resp[NLBUFSIZ];
 	unsigned dup_routes = 0;
 	ssize_t n, nlmsgs_size;
 	struct nlmsghdr *nh;
+	char buf[NLBUFSIZ];
 	unsigned i;
 
-	if ((n = nl_req(s_src, buf, &req, req.nlh.nlmsg_len)) < 0)
+	if ((nlmsgs_size = nl_req(s_src, buf, &req, req.nlh.nlmsg_len)) < 0)
 		return;
 
-	nlmsgs_size = n;
-
-	for (nh = (struct nlmsghdr *)buf;
+	for (nh = (struct nlmsghdr *)buf, n = nlmsgs_size;
 	     NLMSG_OK(nh, n) && nh->nlmsg_type != NLMSG_DONE;
 	     nh = NLMSG_NEXT(nh, n)) {
 		struct rtmsg *rtm = (struct rtmsg *)NLMSG_DATA(nh);
@@ -372,7 +370,6 @@ void nl_route_dup(int s_src, unsigned int ifi_src,
 		if (nh->nlmsg_type != RTM_NEWROUTE)
 			continue;
 
-		nh->nlmsg_seq = nl_seq++;
 		nh->nlmsg_pid = 0;
 		nh->nlmsg_flags &= ~NLM_F_DUMP_FILTERED;
 		nh->nlmsg_flags |= NLM_F_REQUEST | NLM_F_ACK |
@@ -386,17 +383,27 @@ void nl_route_dup(int s_src, unsigned int ifi_src,
 		}
 	}
 
-	nh = (struct nlmsghdr *)buf;
 	/* Routes might have dependencies between each other, and the
 	 * kernel processes RTM_NEWROUTE messages sequentially. For n
-	 * valid routes, we might need to send up to n requests to get
-	 * all of them inserted. Routes that have been already
-	 * inserted won't cause the whole request to fail, so we can
-	 * simply repeat the whole request. This approach avoids the
-	 * need to calculate dependencies: let the kernel do that.
+	 * routes, we might need to send the requests up to n times to
+	 * get all of them inserted. Routes that have been already
+	 * inserted will return -EEXIST, but we can safely ignore that
+	 * and repeat the requests. This avoids the need to calculate
+	 * dependencies: let the kernel do that.
 	 */
-	for (i = 0; i < dup_routes; i++)
-		nl_req(s_dst, resp, nh, nlmsgs_size);
+	for (i = 0; i < dup_routes; i++) {
+		for (nh = (struct nlmsghdr *)buf, n = nlmsgs_size;
+		     NLMSG_OK(nh, n) && nh->nlmsg_type != NLMSG_DONE;
+		     nh = NLMSG_NEXT(nh, n)) {
+			char resp[NLBUFSIZ];
+
+			if (nh->nlmsg_type != RTM_NEWROUTE)
+				continue;
+
+			nh->nlmsg_seq = nl_seq++;
+			nl_req(s_dst, resp, nh, nh->nlmsg_len);
+		}
+	}
 }
 
 /**
@@ -560,19 +567,18 @@ void nl_addr_dup(int s_src, unsigned int ifi_src,
 		.ifa.ifa_index     = ifi_src,
 		.ifa.ifa_prefixlen = 0,
 	};
-	char buf[NLBUFSIZ], resp[NLBUFSIZ];
-	ssize_t n, nlmsgs_size;
+	char buf[NLBUFSIZ];
 	struct nlmsghdr *nh;
+	ssize_t n;
 
 	if ((n = nl_req(s_src, buf, &req, sizeof(req))) < 0)
 		return;
 
-	nlmsgs_size = n;
-
 	for (nh = (struct nlmsghdr *)buf;
 	     NLMSG_OK(nh, n) && nh->nlmsg_type != NLMSG_DONE;
 	     nh = NLMSG_NEXT(nh, n)) {
 		struct ifaddrmsg *ifa;
+		char resp[NLBUFSIZ];
 		struct rtattr *rta;
 		size_t na;
 
@@ -587,10 +593,8 @@ void nl_addr_dup(int s_src, unsigned int ifi_src,
 		ifa = (struct ifaddrmsg *)NLMSG_DATA(nh);
 
 		if (ifa->ifa_scope == RT_SCOPE_LINK ||
-		    ifa->ifa_index != ifi_src) {
-			ifa->ifa_family = AF_UNSPEC;
+		    ifa->ifa_index != ifi_src)
 			continue;
-		}
 
 		ifa->ifa_index = ifi_dst;
 
@@ -599,9 +603,9 @@ void nl_addr_dup(int s_src, unsigned int ifi_src,
 			if (rta->rta_type == IFA_LABEL)
 				rta->rta_type = IFA_UNSPEC;
 		}
-	}
 
-	nl_req(s_dst, resp, buf, nlmsgs_size);
+		nl_req(s_dst, resp, nh, nh->nlmsg_len);
+	}
 }
 
 /**
-- 
@@ -351,18 +351,16 @@ void nl_route_dup(int s_src, unsigned int ifi_src,
 		.rta.rta_len	  = RTA_LENGTH(sizeof(unsigned int)),
 		.ifi		  = ifi_src,
 	};
-	char buf[NLBUFSIZ], resp[NLBUFSIZ];
 	unsigned dup_routes = 0;
 	ssize_t n, nlmsgs_size;
 	struct nlmsghdr *nh;
+	char buf[NLBUFSIZ];
 	unsigned i;
 
-	if ((n = nl_req(s_src, buf, &req, req.nlh.nlmsg_len)) < 0)
+	if ((nlmsgs_size = nl_req(s_src, buf, &req, req.nlh.nlmsg_len)) < 0)
 		return;
 
-	nlmsgs_size = n;
-
-	for (nh = (struct nlmsghdr *)buf;
+	for (nh = (struct nlmsghdr *)buf, n = nlmsgs_size;
 	     NLMSG_OK(nh, n) && nh->nlmsg_type != NLMSG_DONE;
 	     nh = NLMSG_NEXT(nh, n)) {
 		struct rtmsg *rtm = (struct rtmsg *)NLMSG_DATA(nh);
@@ -372,7 +370,6 @@ void nl_route_dup(int s_src, unsigned int ifi_src,
 		if (nh->nlmsg_type != RTM_NEWROUTE)
 			continue;
 
-		nh->nlmsg_seq = nl_seq++;
 		nh->nlmsg_pid = 0;
 		nh->nlmsg_flags &= ~NLM_F_DUMP_FILTERED;
 		nh->nlmsg_flags |= NLM_F_REQUEST | NLM_F_ACK |
@@ -386,17 +383,27 @@ void nl_route_dup(int s_src, unsigned int ifi_src,
 		}
 	}
 
-	nh = (struct nlmsghdr *)buf;
 	/* Routes might have dependencies between each other, and the
 	 * kernel processes RTM_NEWROUTE messages sequentially. For n
-	 * valid routes, we might need to send up to n requests to get
-	 * all of them inserted. Routes that have been already
-	 * inserted won't cause the whole request to fail, so we can
-	 * simply repeat the whole request. This approach avoids the
-	 * need to calculate dependencies: let the kernel do that.
+	 * routes, we might need to send the requests up to n times to
+	 * get all of them inserted. Routes that have been already
+	 * inserted will return -EEXIST, but we can safely ignore that
+	 * and repeat the requests. This avoids the need to calculate
+	 * dependencies: let the kernel do that.
 	 */
-	for (i = 0; i < dup_routes; i++)
-		nl_req(s_dst, resp, nh, nlmsgs_size);
+	for (i = 0; i < dup_routes; i++) {
+		for (nh = (struct nlmsghdr *)buf, n = nlmsgs_size;
+		     NLMSG_OK(nh, n) && nh->nlmsg_type != NLMSG_DONE;
+		     nh = NLMSG_NEXT(nh, n)) {
+			char resp[NLBUFSIZ];
+
+			if (nh->nlmsg_type != RTM_NEWROUTE)
+				continue;
+
+			nh->nlmsg_seq = nl_seq++;
+			nl_req(s_dst, resp, nh, nh->nlmsg_len);
+		}
+	}
 }
 
 /**
@@ -560,19 +567,18 @@ void nl_addr_dup(int s_src, unsigned int ifi_src,
 		.ifa.ifa_index     = ifi_src,
 		.ifa.ifa_prefixlen = 0,
 	};
-	char buf[NLBUFSIZ], resp[NLBUFSIZ];
-	ssize_t n, nlmsgs_size;
+	char buf[NLBUFSIZ];
 	struct nlmsghdr *nh;
+	ssize_t n;
 
 	if ((n = nl_req(s_src, buf, &req, sizeof(req))) < 0)
 		return;
 
-	nlmsgs_size = n;
-
 	for (nh = (struct nlmsghdr *)buf;
 	     NLMSG_OK(nh, n) && nh->nlmsg_type != NLMSG_DONE;
 	     nh = NLMSG_NEXT(nh, n)) {
 		struct ifaddrmsg *ifa;
+		char resp[NLBUFSIZ];
 		struct rtattr *rta;
 		size_t na;
 
@@ -587,10 +593,8 @@ void nl_addr_dup(int s_src, unsigned int ifi_src,
 		ifa = (struct ifaddrmsg *)NLMSG_DATA(nh);
 
 		if (ifa->ifa_scope == RT_SCOPE_LINK ||
-		    ifa->ifa_index != ifi_src) {
-			ifa->ifa_family = AF_UNSPEC;
+		    ifa->ifa_index != ifi_src)
 			continue;
-		}
 
 		ifa->ifa_index = ifi_dst;
 
@@ -599,9 +603,9 @@ void nl_addr_dup(int s_src, unsigned int ifi_src,
 			if (rta->rta_type == IFA_LABEL)
 				rta->rta_type = IFA_UNSPEC;
 		}
-	}
 
-	nl_req(s_dst, resp, buf, nlmsgs_size);
+		nl_req(s_dst, resp, nh, nh->nlmsg_len);
+	}
 }
 
 /**
-- 
2.41.0


  parent reply	other threads:[~2023-07-24  6:09 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-24  6:09 [PATCH 00/17] netlink fixes and cleanups David Gibson
2023-07-24  6:09 ` [PATCH 01/17] netlink: Split up functionality if nl_link() David Gibson
2023-08-02 22:47   ` Stefano Brivio
2023-08-03  2:09     ` David Gibson
2023-08-03  4:29       ` David Gibson
2023-08-03  5:39         ` David Gibson
2023-08-03  5:40         ` Stefano Brivio
2023-07-24  6:09 ` [PATCH 02/17] netlink: Split nl_addr() into separate operation functions David Gibson
2023-08-02 22:47   ` Stefano Brivio
2023-08-03  2:11     ` David Gibson
2023-07-24  6:09 ` [PATCH 03/17] netlink: Split nl_route() " David Gibson
2023-08-02 22:47   ` Stefano Brivio
2023-08-03  2:18     ` David Gibson
2023-07-24  6:09 ` [PATCH 04/17] netlink: Use struct in_addr for IPv4 addresses, not bare uint32_t David Gibson
2023-07-24  6:09 ` [PATCH 05/17] netlink: Explicitly pass netlink sockets to operations David Gibson
2023-07-24  6:09 ` David Gibson [this message]
2023-07-24  6:09 ` [PATCH 07/17] netlink: Start sequence number from 1 instead of 0 David Gibson
2023-07-24  6:09 ` [PATCH 08/17] netlink: Treat send() or recv() errors as fatal David Gibson
2023-08-02 22:47   ` Stefano Brivio
2023-08-03  2:19     ` David Gibson
2023-07-24  6:09 ` [PATCH 09/17] netlink: Fill in netlink header fields from nl_req() David Gibson
2023-07-24  6:09 ` [PATCH 10/17] netlink: Add nl_do() helper for simple operations with error checking David Gibson
2023-08-02 22:48   ` Stefano Brivio
2023-08-03  2:24     ` David Gibson
2023-07-24  6:09 ` [PATCH 11/17] netlink: Clearer reasoning about the netlink response buffer size David Gibson
2023-08-02 22:48   ` Stefano Brivio
2023-08-03  2:22     ` David Gibson
2023-07-24  6:09 ` [PATCH 12/17] netlink: Split nl_req() to allow processing multiple response datagrams David Gibson
2023-07-24  6:09 ` [PATCH 13/17] netlink: Add nl_foreach_oftype to filter response message types David Gibson
2023-07-24  6:09 ` [PATCH 14/17] netlink: Propagate errors for "set" operations David Gibson
2023-07-24  6:09 ` [PATCH 15/17] netlink: Always process all responses to a netlink request David Gibson
2023-07-24  6:09 ` [PATCH 16/17] netlink: Propagate errors for "dump" operations David Gibson
2023-07-24  6:09 ` [PATCH 17/17] netlink: Propagate errors for "dup" operations David Gibson
2023-08-02 22:48   ` Stefano Brivio
2023-08-03  2:26     ` David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230724060936.952659-7-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).