public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: Stefano Brivio <sbrivio@redhat.com>
To: David Gibson <david@gibson.dropbear.id.au>
Cc: passt-dev@passt.top
Subject: Re: [PATCH 03/17] netlink: Split nl_route() into separate operation functions
Date: Thu, 3 Aug 2023 00:47:40 +0200	[thread overview]
Message-ID: <20230803004740.0f271388@elisabeth> (raw)
In-Reply-To: <20230724060936.952659-4-david@gibson.dropbear.id.au>

On Mon, 24 Jul 2023 16:09:22 +1000
David Gibson <david@gibson.dropbear.id.au> wrote:

> nl_route() can perform 3 quite different operations based on the 'op'
> parameter.  Split this into separate functions for each one.  This requires
> more lines of code, but makes the internal logic of each operation much
> easier to follow.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  conf.c    |   4 +-
>  netlink.c | 238 ++++++++++++++++++++++++++++++++++--------------------
>  netlink.h |  11 +--
>  pasta.c   |  16 ++--
>  4 files changed, 164 insertions(+), 105 deletions(-)
> 
> diff --git a/conf.c b/conf.c
> index 2057028..66958d4 100644
> --- a/conf.c
> +++ b/conf.c
> @@ -648,7 +648,7 @@ static unsigned int conf_ip4(unsigned int ifi,
>  	}
>  
>  	if (IN4_IS_ADDR_UNSPECIFIED(&ip4->gw))
> -		nl_route(NL_GET, ifi, 0, AF_INET, &ip4->gw);
> +		nl_route_get_def(ifi, AF_INET, &ip4->gw);
>  
>  	if (IN4_IS_ADDR_UNSPECIFIED(&ip4->addr))
>  		nl_addr_get(ifi, AF_INET, &ip4->addr, &ip4->prefix_len, NULL);
> @@ -699,7 +699,7 @@ static unsigned int conf_ip6(unsigned int ifi,
>  	}
>  
>  	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->gw))
> -		nl_route(NL_GET, ifi, 0, AF_INET6, &ip6->gw);
> +		nl_route_get_def(ifi, AF_INET6, &ip6->gw);
>  
>  	nl_addr_get(ifi, AF_INET6,
>  		    IN6_IS_ADDR_UNSPECIFIED(&ip6->addr) ? &ip6->addr : NULL,
> diff --git a/netlink.c b/netlink.c
> index 269d738..346eb3a 100644
> --- a/netlink.c
> +++ b/netlink.c
> @@ -185,15 +185,71 @@ unsigned int nl_get_ext_if(sa_family_t af)
>  }
>  
>  /**
> - * nl_route() - Get/set/copy routes for given interface and address family
> - * @op:		Requested operation
> - * @ifi:	Interface index in outer network namespace
> - * @ifi_ns:	Interface index in target namespace for NL_SET, NL_DUP
> + * nl_route_get_def() - Get default route for given interface and address family
> + * @ifi:	Interface index
> + * @af:		Address family
> + * @gw:		Default gateway to fill on NL_GET
> + */
> +void nl_route_get_def(unsigned int ifi, sa_family_t af, void *gw)
> +{
> +	struct req_t {
> +		struct nlmsghdr nlh;
> +		struct rtmsg rtm;
> +		struct rtattr rta;
> +		unsigned int ifi;
> +	} req = {
> +		.nlh.nlmsg_type	  = RTM_GETROUTE,
> +		.nlh.nlmsg_len	  = sizeof(req),
> +		.nlh.nlmsg_flags  = NLM_F_REQUEST | NLM_F_DUMP,
> +		.nlh.nlmsg_seq	  = nl_seq++,
> +
> +		.rtm.rtm_family	  = af,
> +		.rtm.rtm_table	  = RT_TABLE_MAIN,
> +		.rtm.rtm_scope	  = RT_SCOPE_UNIVERSE,
> +		.rtm.rtm_type	  = RTN_UNICAST,
> +
> +		.rta.rta_type	  = RTA_OIF,
> +		.rta.rta_len	  = RTA_LENGTH(sizeof(unsigned int)),
> +		.ifi		  = ifi,
> +	};
> +	struct nlmsghdr *nh;
> +	char buf[NLBUFSIZ];
> +	ssize_t n;
> +
> +	if ((n = nl_req(0, buf, &req, req.nlh.nlmsg_len)) < 0)
> +		return;
> +
> +	for (nh = (struct nlmsghdr *)buf;
> +	     NLMSG_OK(nh, n) && nh->nlmsg_type != NLMSG_DONE;
> +	     nh = NLMSG_NEXT(nh, n)) {
> +		struct rtmsg *rtm = (struct rtmsg *)NLMSG_DATA(nh);
> +		struct rtattr *rta;
> +		size_t na;
> +
> +		if (nh->nlmsg_type != RTM_NEWROUTE)
> +			continue;
> +
> +		if (rtm->rtm_dst_len)
> +			continue;
> +
> +		for (rta = RTM_RTA(rtm), na = RTM_PAYLOAD(nh); RTA_OK(rta, na);
> +		     rta = RTA_NEXT(rta, na)) {
> +			if (rta->rta_type != RTA_GATEWAY)
> +				continue;
> +
> +			memcpy(gw, RTA_DATA(rta), RTA_PAYLOAD(rta));
> +			return;
> +		}
> +	}
> +}
> +
> +/**
> + * nl_route_set_def() - Set default route for given interface and address family
> + * @ifi:	Interface index in target namespace
>   * @af:		Address family
> - * @gw:		Default gateway to fill on NL_GET, to set on NL_SET
> + * @gw:		Default gateway to set
>   */
> -void nl_route(enum nl_op op, unsigned int ifi, unsigned int ifi_ns,
> -	      sa_family_t af, void *gw)
> +void nl_route_set_def(unsigned int ifi, sa_family_t af, void *gw)
>  {
>  	struct req_t {
>  		struct nlmsghdr nlh;
> @@ -215,122 +271,126 @@ void nl_route(enum nl_op op, unsigned int ifi, unsigned int ifi_ns,
>  			} r4;
>  		} set;
>  	} req = {
> -		.nlh.nlmsg_type	  = op == NL_SET ? RTM_NEWROUTE : RTM_GETROUTE,
> -		.nlh.nlmsg_flags  = NLM_F_REQUEST,
> +		.nlh.nlmsg_type	  = RTM_NEWROUTE,
> +		.nlh.nlmsg_flags  = NLM_F_REQUEST | NLM_F_ACK |
> +				    NLM_F_CREATE | NLM_F_EXCL,
>  		.nlh.nlmsg_seq	  = nl_seq++,
>  
>  		.rtm.rtm_family	  = af,
>  		.rtm.rtm_table	  = RT_TABLE_MAIN,
>  		.rtm.rtm_scope	  = RT_SCOPE_UNIVERSE,
>  		.rtm.rtm_type	  = RTN_UNICAST,
> +		.rtm.rtm_protocol = RTPROT_BOOT,
>  
>  		.rta.rta_type	  = RTA_OIF,
>  		.rta.rta_len	  = RTA_LENGTH(sizeof(unsigned int)),
> -		.ifi		  = op == NL_SET ? ifi_ns : ifi,
> +		.ifi		  = ifi,
>  	};
> -	unsigned dup_routes = 0;
> -	ssize_t n, nlmsgs_size;
> -	struct nlmsghdr *nh;
> -	struct rtattr *rta;
>  	char buf[NLBUFSIZ];
> -	struct rtmsg *rtm;
> -	size_t na;
> -
> -	if (op == NL_SET) {
> -		if (af == AF_INET6) {
> -			size_t rta_len = RTA_LENGTH(sizeof(req.set.r6.d));
>  
> -			req.nlh.nlmsg_len = offsetof(struct req_t, set.r6)
> -				+ sizeof(req.set.r6);
> +	if (af == AF_INET6) {
> +		size_t rta_len = RTA_LENGTH(sizeof(req.set.r6.d));
>  
> -			req.set.r6.rta_dst.rta_type = RTA_DST;
> -			req.set.r6.rta_dst.rta_len = rta_len;
> +		req.nlh.nlmsg_len = offsetof(struct req_t, set.r6)
> +			+ sizeof(req.set.r6);
>  
> -			memcpy(&req.set.r6.a, gw, sizeof(req.set.r6.a));
> -			req.set.r6.rta_gw.rta_type = RTA_GATEWAY;
> -			req.set.r6.rta_gw.rta_len = rta_len;
> -		} else {
> -			size_t rta_len = RTA_LENGTH(sizeof(req.set.r4.d));
> +		req.set.r6.rta_dst.rta_type = RTA_DST;
> +		req.set.r6.rta_dst.rta_len = rta_len;
>  
> -			req.nlh.nlmsg_len = offsetof(struct req_t, set.r4)
> -				+ sizeof(req.set.r4);
> +		memcpy(&req.set.r6.a, gw, sizeof(req.set.r6.a));
> +		req.set.r6.rta_gw.rta_type = RTA_GATEWAY;
> +		req.set.r6.rta_gw.rta_len = rta_len;
> +	} else {
> +		size_t rta_len = RTA_LENGTH(sizeof(req.set.r4.d));
>  
> -			req.set.r4.rta_dst.rta_type = RTA_DST;
> -			req.set.r4.rta_dst.rta_len = rta_len;
> +		req.nlh.nlmsg_len = offsetof(struct req_t, set.r4)
> +			+ sizeof(req.set.r4);
>  
> -			req.set.r4.a = *(uint32_t *)gw;
> -			req.set.r4.rta_gw.rta_type = RTA_GATEWAY;
> -			req.set.r4.rta_gw.rta_len = rta_len;
> -		}
> +		req.set.r4.rta_dst.rta_type = RTA_DST;
> +		req.set.r4.rta_dst.rta_len = rta_len;
>  
> -		req.rtm.rtm_protocol = RTPROT_BOOT;
> -		req.nlh.nlmsg_flags |= NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
> -	} else {
> -		req.nlh.nlmsg_len = offsetof(struct req_t, set.r6);
> -		req.nlh.nlmsg_flags |= NLM_F_DUMP;
> +		req.set.r4.a = *(uint32_t *)gw;
> +		req.set.r4.rta_gw.rta_type = RTA_GATEWAY;
> +		req.set.r4.rta_gw.rta_len = rta_len;
>  	}
>  
> -	if ((n = nl_req(op == NL_SET, buf, &req, req.nlh.nlmsg_len)) < 0)
> -		return;
> +	nl_req(1, buf, &req, req.nlh.nlmsg_len);
> +}
>  
> -	if (op == NL_SET)
> +/**
> + * nl_route_dup() - Copy routes for given interface and address family
> + * @ifi:	Interface index in outer network namespace
> + * @ifi_ns:	Interface index in target namespace for NL_SET, NL_DUP
> + * @af:		Address family
> + */
> +void nl_route_dup(unsigned int ifi, unsigned int ifi_ns, sa_family_t af)
> +{
> +	struct req_t {
> +		struct nlmsghdr nlh;
> +		struct rtmsg rtm;
> +		struct rtattr rta;
> +		unsigned int ifi;
> +	} req = {
> +		.nlh.nlmsg_type	  = RTM_GETROUTE,
> +		.nlh.nlmsg_len	  = sizeof(req),
> +		.nlh.nlmsg_flags  = NLM_F_REQUEST | NLM_F_DUMP,
> +		.nlh.nlmsg_seq	  = nl_seq++,
> +
> +		.rtm.rtm_family	  = af,
> +		.rtm.rtm_table	  = RT_TABLE_MAIN,
> +		.rtm.rtm_scope	  = RT_SCOPE_UNIVERSE,
> +		.rtm.rtm_type	  = RTN_UNICAST,
> +
> +		.rta.rta_type	  = RTA_OIF,
> +		.rta.rta_len	  = RTA_LENGTH(sizeof(unsigned int)),
> +		.ifi		  = ifi,
> +	};
> +	char buf[NLBUFSIZ], resp[NLBUFSIZ];
> +	unsigned dup_routes = 0;
> +	ssize_t n, nlmsgs_size;
> +	struct nlmsghdr *nh;
> +	unsigned i;
> +
> +	if ((n = nl_req(0, buf, &req, req.nlh.nlmsg_len)) < 0)
>  		return;
>  
> -	nh = (struct nlmsghdr *)buf;
>  	nlmsgs_size = n;
>  
> -	for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) {
> -		if (nh->nlmsg_type != RTM_NEWROUTE)
> -			goto next;
> -
> -		if (op == NL_DUP) {
> -			nh->nlmsg_seq = nl_seq++;
> -			nh->nlmsg_pid = 0;
> -			nh->nlmsg_flags &= ~NLM_F_DUMP_FILTERED;
> -			nh->nlmsg_flags |= NLM_F_REQUEST | NLM_F_ACK |
> -					   NLM_F_CREATE;
> -			dup_routes++;
> -		}
> +	for (nh = (struct nlmsghdr *)buf;
> +	     NLMSG_OK(nh, n) && nh->nlmsg_type != NLMSG_DONE;
> +	     nh = NLMSG_NEXT(nh, n)) {
> +		struct rtmsg *rtm = (struct rtmsg *)NLMSG_DATA(nh);
> +		struct rtattr *rta;
> +		size_t na;
>  
> -		rtm = (struct rtmsg *)NLMSG_DATA(nh);
> -		if (op == NL_GET && rtm->rtm_dst_len)
> +		if (nh->nlmsg_type != RTM_NEWROUTE)
>  			continue;
>  
> +		nh->nlmsg_seq = nl_seq++;
> +		nh->nlmsg_pid = 0;
> +		nh->nlmsg_flags &= ~NLM_F_DUMP_FILTERED;
> +		nh->nlmsg_flags |= NLM_F_REQUEST | NLM_F_ACK |
> +			NLM_F_CREATE;
> +		dup_routes++;
> +
>  		for (rta = RTM_RTA(rtm), na = RTM_PAYLOAD(nh); RTA_OK(rta, na);
>  		     rta = RTA_NEXT(rta, na)) {
> -			if (op == NL_GET) {
> -				if (rta->rta_type != RTA_GATEWAY)
> -					continue;
> -
> -				memcpy(gw, RTA_DATA(rta), RTA_PAYLOAD(rta));
> -				return;
> -			}
> -
> -			if (op == NL_DUP && rta->rta_type == RTA_OIF)
> +			if (rta->rta_type == RTA_OIF)
>  				*(unsigned int *)RTA_DATA(rta) = ifi_ns;
>  		}
> -
> -next:
> -		if (nh->nlmsg_type == NLMSG_DONE)
> -			break;
>  	}
>  
> -	if (op == NL_DUP) {
> -		char resp[NLBUFSIZ];
> -		unsigned i;
> -
> -		nh = (struct nlmsghdr *)buf;
> -		/* Routes might have dependencies between each other, and the
> -		 * kernel processes RTM_NEWROUTE messages sequentially. For n
> -		 * valid routes, we might need to send up to n requests to get
> -		 * all of them inserted. Routes that have been already inserted
> -		 * won't cause the whole request to fail, so we can simply
> -		 * repeat the whole request. This approach avoids the need to
> -		 * calculate dependencies: let the kernel do that.
> -		 */
> -		for (i = 0; i < dup_routes; i++)
> -			nl_req(1, resp, nh, nlmsgs_size);
> -	}
> +	nh = (struct nlmsghdr *)buf;
> +	/* Routes might have dependencies between each other, and the
> +	 * kernel processes RTM_NEWROUTE messages sequentially. For n
> +	 * valid routes, we might need to send up to n requests to get
> +	 * all of them inserted. Routes that have been already
> +	 * inserted won't cause the whole request to fail, so we can
> +	 * simply repeat the whole request. This approach avoids the
> +	 * need to calculate dependencies: let the kernel do that.
> +	 */

Or:

	/* Routes might have dependencies between each other, and the kernel
	 * processes RTM_NEWROUTE messages sequentially. For n valid routes, we
	 * might need to send up to n requests to get all of them inserted.
	 * Routes that have been already inserted won't cause the whole request
	 * to fail, so we can simply repeat the whole request. This approach
	 * avoids the need to calculate dependencies: let the kernel do that.
	 */

(can also be "fixed" in 6/17).

-- 
Stefano


  reply	other threads:[~2023-08-02 22:47 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-24  6:09 [PATCH 00/17] netlink fixes and cleanups David Gibson
2023-07-24  6:09 ` [PATCH 01/17] netlink: Split up functionality if nl_link() David Gibson
2023-08-02 22:47   ` Stefano Brivio
2023-08-03  2:09     ` David Gibson
2023-08-03  4:29       ` David Gibson
2023-08-03  5:39         ` David Gibson
2023-08-03  5:40         ` Stefano Brivio
2023-07-24  6:09 ` [PATCH 02/17] netlink: Split nl_addr() into separate operation functions David Gibson
2023-08-02 22:47   ` Stefano Brivio
2023-08-03  2:11     ` David Gibson
2023-07-24  6:09 ` [PATCH 03/17] netlink: Split nl_route() " David Gibson
2023-08-02 22:47   ` Stefano Brivio [this message]
2023-08-03  2:18     ` David Gibson
2023-07-24  6:09 ` [PATCH 04/17] netlink: Use struct in_addr for IPv4 addresses, not bare uint32_t David Gibson
2023-07-24  6:09 ` [PATCH 05/17] netlink: Explicitly pass netlink sockets to operations David Gibson
2023-07-24  6:09 ` [PATCH 06/17] netlink: Make nl_*_dup() use a separate datagram for each request David Gibson
2023-07-24  6:09 ` [PATCH 07/17] netlink: Start sequence number from 1 instead of 0 David Gibson
2023-07-24  6:09 ` [PATCH 08/17] netlink: Treat send() or recv() errors as fatal David Gibson
2023-08-02 22:47   ` Stefano Brivio
2023-08-03  2:19     ` David Gibson
2023-07-24  6:09 ` [PATCH 09/17] netlink: Fill in netlink header fields from nl_req() David Gibson
2023-07-24  6:09 ` [PATCH 10/17] netlink: Add nl_do() helper for simple operations with error checking David Gibson
2023-08-02 22:48   ` Stefano Brivio
2023-08-03  2:24     ` David Gibson
2023-07-24  6:09 ` [PATCH 11/17] netlink: Clearer reasoning about the netlink response buffer size David Gibson
2023-08-02 22:48   ` Stefano Brivio
2023-08-03  2:22     ` David Gibson
2023-07-24  6:09 ` [PATCH 12/17] netlink: Split nl_req() to allow processing multiple response datagrams David Gibson
2023-07-24  6:09 ` [PATCH 13/17] netlink: Add nl_foreach_oftype to filter response message types David Gibson
2023-07-24  6:09 ` [PATCH 14/17] netlink: Propagate errors for "set" operations David Gibson
2023-07-24  6:09 ` [PATCH 15/17] netlink: Always process all responses to a netlink request David Gibson
2023-07-24  6:09 ` [PATCH 16/17] netlink: Propagate errors for "dump" operations David Gibson
2023-07-24  6:09 ` [PATCH 17/17] netlink: Propagate errors for "dup" operations David Gibson
2023-08-02 22:48   ` Stefano Brivio
2023-08-03  2:26     ` David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230803004740.0f271388@elisabeth \
    --to=sbrivio@redhat.com \
    --cc=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).