public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: Stefano Brivio <sbrivio@redhat.com>
To: passt-dev@passt.top
Subject: [PATCH 08/24] udp: Split buffer queueing/writing parts of udp_sock_handler()
Date: Fri, 25 Mar 2022 23:52:44 +0100	[thread overview]
Message-ID: <20220325225300.2803584-9-sbrivio@redhat.com> (raw)
In-Reply-To: <20220325225300.2803584-1-sbrivio@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 14624 bytes --]

...it became too hard to follow: split it off to
udp_sock_fill_data_v{4,6}.

While at it, use IN6_ARE_ADDR_EQUAL(a, b), courtesy of netinet/in.h,
instead of open-coded memcmp().

Signed-off-by: Stefano Brivio <sbrivio(a)redhat.com>
---
 udp.c | 364 +++++++++++++++++++++++++++++++---------------------------
 1 file changed, 193 insertions(+), 171 deletions(-)

diff --git a/udp.c b/udp.c
index ce536a6..ebbcda1 100644
--- a/udp.c
+++ b/udp.c
@@ -655,6 +655,177 @@ static void udp_sock_handler_splice(struct ctx *c, union epoll_ref ref,
 	sendmmsg(s, udp_mmh_sendto, n, MSG_NOSIGNAL);
 }
 
+/**
+ * udp_sock_fill_data_v4() - Fill and queue one buffer. In pasta mode, write it
+ * @c:		Execution context
+ * @n:		Index of buffer in udp4_l2_buf pool
+ * @ref:	epoll reference from socket
+ * @msg_idx:	Index within message being prepared (spans multiple buffers)
+ * @msg_len:	Length of current message being prepared for sending
+ * @now:	Current timestamp
+ */
+static void udp_sock_fill_data_v4(struct ctx *c, int n, union epoll_ref ref,
+				  int *msg_idx, int *msg_bufs, ssize_t *msg_len,
+				  struct timespec *now)
+{
+	struct msghdr *mh = &udp6_l2_mh_tap[*msg_idx].msg_hdr;
+	struct udp4_l2_buf_t *b = &udp4_l2_buf[n];
+	size_t ip_len, buf_len;
+	in_port_t src_port;
+	in_addr_t src;
+
+	ip_len = udp4_l2_mh_sock[n].msg_len + sizeof(b->iph) + sizeof(b->uh);
+
+	b->iph.tot_len = htons(ip_len);
+
+	src = ntohl(b->s_in.sin_addr.s_addr);
+	src_port = htons(b->s_in.sin_port);
+
+	if (src >> IN_CLASSA_NSHIFT == IN_LOOPBACKNET ||
+	    src == INADDR_ANY || src == ntohl(c->addr4_seen)) {
+		b->iph.saddr = c->gw4;
+		udp_tap_map[V4][src_port].ts_local = now->tv_sec;
+
+		if (b->s_in.sin_addr.s_addr == c->addr4_seen)
+			udp_tap_map[V4][src_port].loopback = 0;
+		else
+			udp_tap_map[V4][src_port].loopback = 1;
+
+		bitmap_set(udp_act[V4][UDP_ACT_TAP], src_port);
+	} else if (c->dns4_fwd &&
+		   src == ntohl(c->dns4[0]) && ntohs(src_port) == 53) {
+		b->iph.saddr = c->dns4_fwd;
+	} else {
+		b->iph.saddr = b->s_in.sin_addr.s_addr;
+	}
+
+	udp_update_check4(b);
+	b->uh.source = b->s_in.sin_port;
+	b->uh.dest = htons(ref.r.p.udp.udp.port);
+	b->uh.len = htons(udp4_l2_mh_sock[n].msg_len + sizeof(b->uh));
+
+	if (c->mode == MODE_PASTA) {
+		if (write(c->fd_tap, &b->eh, sizeof(b->eh) + ip_len) < 0)
+			debug("tap write: %s", strerror(errno));
+		pcap((char *)&b->eh, sizeof(b->eh) + ip_len);
+
+		return;
+	}
+
+	b->vnet_len = htonl(ip_len + sizeof(struct ethhdr));
+	buf_len = sizeof(uint32_t) + sizeof(struct ethhdr) + ip_len;
+	udp4_l2_iov_tap[n].iov_len = buf_len;
+
+	/* With bigger messages, qemu closes the connection. */
+	if (*msg_bufs && *msg_len + buf_len > SHRT_MAX) {
+		mh->msg_iovlen = *msg_bufs;
+
+		(*msg_idx)++;
+		udp4_l2_mh_tap[*msg_idx].msg_hdr.msg_iov = &udp4_l2_iov_tap[n];
+		*msg_len = *msg_bufs = 0;
+	}
+
+	*msg_len += buf_len;
+	(*msg_bufs)++;
+}
+
+/**
+ * udp_sock_fill_data_v4() - Fill and queue one buffer. In pasta mode, write it
+ * @c:		Execution context
+ * @n:		Index of buffer in udp4_l2_buf pool
+ * @ref:	epoll reference from socket
+ * @msg_idx:	Index within message being prepared (spans multiple buffers)
+ * @msg_len:	Length of current message being prepared for sending
+ * @now:	Current timestamp
+ */
+static void udp_sock_fill_data_v6(struct ctx *c, int n, union epoll_ref ref,
+				  int *msg_idx, int *msg_bufs, ssize_t *msg_len,
+				  struct timespec *now)
+{
+	struct msghdr *mh = &udp6_l2_mh_tap[*msg_idx].msg_hdr;
+	struct udp6_l2_buf_t *b = &udp6_l2_buf[n];
+	size_t ip_len, buf_len;
+	struct in6_addr *src;
+	in_port_t src_port;
+
+	src = &b->s_in6.sin6_addr;
+	src_port = ntohs(b->s_in6.sin6_port);
+
+	ip_len = udp6_l2_mh_sock[n].msg_len + sizeof(b->ip6h) + sizeof(b->uh);
+
+	b->ip6h.payload_len = htons(udp6_l2_mh_sock[n].msg_len + sizeof(b->uh));
+
+	if (IN6_IS_ADDR_LINKLOCAL(src)) {
+		b->ip6h.daddr = c->addr6_ll_seen;
+		b->ip6h.saddr = b->s_in6.sin6_addr;
+	} else if (IN6_IS_ADDR_LOOPBACK(src)			||
+		   IN6_ARE_ADDR_EQUAL(src, &c->addr6_seen)	||
+		   IN6_ARE_ADDR_EQUAL(src, &c->addr6)) {
+		b->ip6h.daddr = c->addr6_ll_seen;
+
+		if (IN6_IS_ADDR_LINKLOCAL(&c->gw6))
+			b->ip6h.saddr = c->gw6;
+		else
+			b->ip6h.saddr = c->addr6_ll;
+
+		udp_tap_map[V6][src_port].ts_local = now->tv_sec;
+
+		if (IN6_IS_ADDR_LOOPBACK(src))
+			udp_tap_map[V6][src_port].loopback = 1;
+		else
+			udp_tap_map[V6][src_port].loopback = 0;
+
+		if (IN6_ARE_ADDR_EQUAL(src, &c->addr6))
+			udp_tap_map[V6][src_port].gua = 1;
+		else
+			udp_tap_map[V6][src_port].gua = 0;
+
+		bitmap_set(udp_act[V6][UDP_ACT_TAP], src_port);
+	} else if (!IN6_IS_ADDR_UNSPECIFIED(&c->dns6_fwd) &&
+		   IN6_ARE_ADDR_EQUAL(src, &c->dns6_fwd) && src_port == 53) {
+		b->ip6h.daddr = c->addr6_seen;
+		b->ip6h.saddr = c->dns6_fwd;
+	} else {
+		b->ip6h.daddr = c->addr6_seen;
+		b->ip6h.saddr = b->s_in6.sin6_addr;
+	}
+
+	b->uh.source = b->s_in6.sin6_port;
+	b->uh.dest = htons(ref.r.p.udp.udp.port);
+	b->uh.len = b->ip6h.payload_len;
+
+	b->ip6h.hop_limit = IPPROTO_UDP;
+	b->ip6h.version = b->ip6h.nexthdr = b->uh.check = 0;
+	b->uh.check = csum(&b->ip6h, ip_len, 0);
+	b->ip6h.version = 6;
+	b->ip6h.nexthdr = IPPROTO_UDP;
+	b->ip6h.hop_limit = 255;
+
+	if (c->mode == MODE_PASTA) {
+		if (write(c->fd_tap, &b->eh, sizeof(b->eh) + ip_len) < 0)
+			debug("tap write: %s", strerror(errno));
+		pcap((char *)&b->eh, sizeof(b->eh) + ip_len);
+
+		return;
+	}
+
+	b->vnet_len = htonl(ip_len + sizeof(struct ethhdr));
+	buf_len = sizeof(uint32_t) + sizeof(struct ethhdr) + ip_len;
+	udp6_l2_iov_tap[n].iov_len = buf_len;
+
+	/* With bigger messages, qemu closes the connection. */
+	if (*msg_bufs && *msg_len + buf_len > SHRT_MAX) {
+		mh->msg_iovlen = *msg_bufs;
+
+		(*msg_idx)++;
+		udp6_l2_mh_tap[*msg_idx].msg_hdr.msg_iov = &udp6_l2_iov_tap[n];
+		*msg_len = *msg_bufs = 0;
+	}
+
+	*msg_len += buf_len;
+	(*msg_bufs)++;
+}
+
 /**
  * udp_sock_handler() - Handle new data from socket
  * @c:		Execution context
@@ -668,10 +839,10 @@ static void udp_sock_handler_splice(struct ctx *c, union epoll_ref ref,
 void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
 		      struct timespec *now)
 {
-	int iov_in_msg, msg_i = 0, ret;
-	ssize_t n, msglen, missing = 0;
+	ssize_t n, msg_len = 0, missing = 0;
+	int msg_bufs = 0, msg_i = 0, ret;
 	struct mmsghdr *tap_mmh;
-	struct msghdr *cur_mh;
+	struct msghdr *last_mh;
 	unsigned int i;
 
 	if (events == EPOLLERR)
@@ -687,183 +858,34 @@ void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
 		if (n <= 0)
 			return;
 
-		cur_mh = &udp6_l2_mh_tap[msg_i].msg_hdr;
-		cur_mh->msg_iov = &udp6_l2_iov_tap[0];
-		msg_i = msglen = iov_in_msg = 0;
+		udp6_l2_mh_tap[0].msg_hdr.msg_iov = &udp6_l2_iov_tap[0];
 
 		for (i = 0; i < (unsigned)n; i++) {
-			struct udp6_l2_buf_t *b = &udp6_l2_buf[i];
-			size_t ip_len, iov_len;
-
-			ip_len = udp6_l2_mh_sock[i].msg_len +
-				 sizeof(b->ip6h) + sizeof(b->uh);
-
-			b->ip6h.payload_len = htons(udp6_l2_mh_sock[i].msg_len +
-						    sizeof(b->uh));
-
-			if (IN6_IS_ADDR_LINKLOCAL(&b->s_in6.sin6_addr)) {
-				b->ip6h.daddr = c->addr6_ll_seen;
-				b->ip6h.saddr = b->s_in6.sin6_addr;
-			} else if (IN6_IS_ADDR_LOOPBACK(&b->s_in6.sin6_addr) ||
-				   !memcmp(&b->s_in6.sin6_addr, &c->addr6_seen,
-					   sizeof(c->addr6)) ||
-				   !memcmp(&b->s_in6.sin6_addr, &c->addr6,
-					   sizeof(c->addr6))) {
-				in_port_t src = htons(b->s_in6.sin6_port);
-
-				b->ip6h.daddr = c->addr6_ll_seen;
-
-				if (IN6_IS_ADDR_LINKLOCAL(&c->gw6))
-					b->ip6h.saddr = c->gw6;
-				else
-					b->ip6h.saddr = c->addr6_ll;
-
-				udp_tap_map[V6][src].ts_local = now->tv_sec;
-
-				if (IN6_IS_ADDR_LOOPBACK(&b->s_in6.sin6_addr))
-					udp_tap_map[V6][src].loopback = 1;
-				else
-					udp_tap_map[V6][src].loopback = 0;
-
-				if (!memcmp(&b->s_in6.sin6_addr, &c->addr6,
-						 sizeof(c->addr6)))
-					udp_tap_map[V6][src].gua = 1;
-				else
-					udp_tap_map[V6][src].gua = 0;
-
-				bitmap_set(udp_act[V6][UDP_ACT_TAP], src);
-			} else if (!IN6_IS_ADDR_UNSPECIFIED(&c->dns6_fwd) &&
-				   !memcmp(&b->s_in6.sin6_addr, &c->dns6_fwd,
-					   sizeof(c->dns6_fwd)) &&
-				   ntohs(b->s_in6.sin6_port) == 53) {
-				b->ip6h.daddr = c->addr6_seen;
-				b->ip6h.saddr = c->dns6_fwd;
-			} else {
-				b->ip6h.daddr = c->addr6_seen;
-				b->ip6h.saddr = b->s_in6.sin6_addr;
-			}
-
-			b->uh.source = b->s_in6.sin6_port;
-			b->uh.dest = htons(ref.r.p.udp.udp.port);
-			b->uh.len = b->ip6h.payload_len;
-
-			b->ip6h.hop_limit = IPPROTO_UDP;
-			b->ip6h.version = 0;
-			b->ip6h.nexthdr = 0;
-			b->uh.check = 0;
-			b->uh.check = csum(&b->ip6h, ip_len, 0);
-			b->ip6h.version = 6;
-			b->ip6h.nexthdr = IPPROTO_UDP;
-			b->ip6h.hop_limit = 255;
-
-			if (c->mode == MODE_PASTA) {
-				ip_len += sizeof(struct ethhdr);
-				if (write(c->fd_tap, &b->eh, ip_len) < 0)
-					debug("tap write: %s", strerror(errno));
-				pcap((char *)&b->eh, ip_len);
-				continue;
-			}
-
-			b->vnet_len = htonl(ip_len + sizeof(struct ethhdr));
-			iov_len = sizeof(uint32_t) + sizeof(struct ethhdr) +
-				  ip_len;
-			udp6_l2_iov_tap[i].iov_len = iov_len;
-
-			/* With bigger messages, qemu closes the connection. */
-			if (iov_in_msg && msglen + iov_len > SHRT_MAX) {
-				cur_mh->msg_iovlen = iov_in_msg;
-
-				cur_mh = &udp6_l2_mh_tap[++msg_i].msg_hdr;
-				msglen = iov_in_msg = 0;
-				cur_mh->msg_iov = &udp6_l2_iov_tap[i];
-			}
-
-			msglen += iov_len;
-			iov_in_msg++;
+			udp_sock_fill_data_v6(c, i, ref,
+					      &msg_i, &msg_bufs, &msg_len, now);
 		}
 
+		udp6_l2_mh_tap[msg_i].msg_hdr.msg_iovlen = msg_bufs;
 		tap_mmh = udp6_l2_mh_tap;
 	} else {
 		n = recvmmsg(ref.r.s, udp4_l2_mh_sock, UDP_TAP_FRAMES, 0, NULL);
 		if (n <= 0)
 			return;
 
-		cur_mh = &udp4_l2_mh_tap[msg_i].msg_hdr;
-		cur_mh->msg_iov = &udp4_l2_iov_tap[0];
-		msg_i = msglen = iov_in_msg = 0;
+		udp6_l2_mh_tap[0].msg_hdr.msg_iov = &udp6_l2_iov_tap[0];
 
 		for (i = 0; i < (unsigned)n; i++) {
-			struct udp4_l2_buf_t *b = &udp4_l2_buf[i];
-			size_t ip_len, iov_len;
-			in_addr_t s_addr;
-
-			ip_len = udp4_l2_mh_sock[i].msg_len +
-				 sizeof(b->iph) + sizeof(b->uh);
-
-			b->iph.tot_len = htons(ip_len);
-
-			s_addr = ntohl(b->s_in.sin_addr.s_addr);
-			if (s_addr >> IN_CLASSA_NSHIFT == IN_LOOPBACKNET ||
-			    s_addr == INADDR_ANY ||
-			    s_addr == ntohl(c->addr4_seen)) {
-				in_port_t src = htons(b->s_in.sin_port);
-
-				b->iph.saddr = c->gw4;
-				udp_tap_map[V4][src].ts_local = now->tv_sec;
-
-				if (b->s_in.sin_addr.s_addr == c->addr4_seen)
-					udp_tap_map[V4][src].loopback = 0;
-				else
-					udp_tap_map[V4][src].loopback = 1;
-
-				bitmap_set(udp_act[V4][UDP_ACT_TAP], src);
-			} else if (c->dns4_fwd &&
-				   s_addr == ntohl(c->dns4[0]) &&
-				   ntohs(b->s_in.sin_port) == 53) {
-				b->iph.saddr = c->dns4_fwd;
-			} else {
-				b->iph.saddr = b->s_in.sin_addr.s_addr;
-			}
-
-			udp_update_check4(b);
-			b->uh.source = b->s_in.sin_port;
-			b->uh.dest = htons(ref.r.p.udp.udp.port);
-			b->uh.len = ntohs(udp4_l2_mh_sock[i].msg_len +
-					  sizeof(b->uh));
-
-			if (c->mode == MODE_PASTA) {
-				ip_len += sizeof(struct ethhdr);
-				if (write(c->fd_tap, &b->eh, ip_len) < 0)
-					debug("tap write: %s", strerror(errno));
-				pcap((char *)&b->eh, ip_len);
-				continue;
-			}
-
-			b->vnet_len = htonl(ip_len + sizeof(struct ethhdr));
-			iov_len = sizeof(uint32_t) + sizeof(struct ethhdr) +
-				  ip_len;
-			udp4_l2_iov_tap[i].iov_len = iov_len;
-
-			/* With bigger messages, qemu closes the connection. */
-			if (iov_in_msg && msglen + iov_len > SHRT_MAX) {
-				cur_mh->msg_iovlen = iov_in_msg;
-
-				cur_mh = &udp4_l2_mh_tap[++msg_i].msg_hdr;
-				msglen = iov_in_msg = 0;
-				cur_mh->msg_iov = &udp4_l2_iov_tap[i];
-			}
-
-			msglen += iov_len;
-			iov_in_msg++;
+			udp_sock_fill_data_v4(c, i, ref,
+					      &msg_i, &msg_bufs, &msg_len, now);
 		}
 
+		udp4_l2_mh_tap[msg_i].msg_hdr.msg_iovlen = msg_bufs;
 		tap_mmh = udp4_l2_mh_tap;
 	}
 
 	if (c->mode == MODE_PASTA)
 		return;
 
-	cur_mh->msg_iovlen = iov_in_msg;
 	ret = sendmmsg(c->fd_tap, tap_mmh, msg_i + 1,
 		       MSG_NOSIGNAL | MSG_DONTWAIT);
 	if (ret <= 0)
@@ -887,25 +909,25 @@ void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
 	 *
 	 * re-send everything from here:		   ^--  -----   ------
 	 */
-	cur_mh = &tap_mmh[ret - 1].msg_hdr;
-	for (i = 0, msglen = 0; i < cur_mh->msg_iovlen; i++) {
+	last_mh = &tap_mmh[ret - 1].msg_hdr;
+	for (i = 0, msg_len = 0; i < last_mh->msg_iovlen; i++) {
 		if (missing <= 0) {
-			msglen += cur_mh->msg_iov[i].iov_len;
-			missing = msglen - tap_mmh[ret - 1].msg_len;
+			msg_len += last_mh->msg_iov[i].iov_len;
+			missing = msg_len - tap_mmh[ret - 1].msg_len;
 		}
 
 		if (missing > 0) {
 			uint8_t **iov_base;
 			int first_offset;
 
-			iov_base = (uint8_t **)&cur_mh->msg_iov[i].iov_base;
-			first_offset = cur_mh->msg_iov[i].iov_len - missing;
+			iov_base = (uint8_t **)&last_mh->msg_iov[i].iov_base;
+			first_offset = last_mh->msg_iov[i].iov_len - missing;
 			*iov_base += first_offset;
-			cur_mh->msg_iov[i].iov_len = missing;
+			last_mh->msg_iov[i].iov_len = missing;
 
-			cur_mh->msg_iov = &cur_mh->msg_iov[i];
+			last_mh->msg_iov = &last_mh->msg_iov[i];
 
-			sendmsg(c->fd_tap, cur_mh, MSG_NOSIGNAL);
+			sendmsg(c->fd_tap, last_mh, MSG_NOSIGNAL);
 
 			*iov_base -= first_offset;
 			break;
@@ -997,7 +1019,7 @@ int udp_tap_handler(struct ctx *c, int af, void *addr,
 		sa = (struct sockaddr *)&s_in6;
 		sl = sizeof(s_in6);
 
-		if (!memcmp(addr, &c->gw6, sizeof(c->gw6)) && !c->no_map_gw) {
+		if (IN6_ARE_ADDR_EQUAL(addr, &c->gw6) && !c->no_map_gw) {
 			if (!udp_tap_map[V6][dst].ts_local ||
 			    udp_tap_map[V6][dst].loopback)
 				s_in6.sin6_addr = in6addr_loopback;
@@ -1005,7 +1027,7 @@ int udp_tap_handler(struct ctx *c, int af, void *addr,
 				s_in6.sin6_addr = c->addr6;
 			else
 				s_in6.sin6_addr = c->addr6_seen;
-		} else if (!memcmp(addr, &c->dns6_fwd, sizeof(c->dns6_fwd)) &&
+		} else if (IN6_ARE_ADDR_EQUAL(addr, &c->dns6_fwd) &&
 			   ntohs(s_in6.sin6_port) == 53) {
 			s_in6.sin6_addr = c->dns6[0];
 		} else if (IN6_IS_ADDR_LINKLOCAL(&s_in6.sin6_addr)) {
-- 
@@ -655,6 +655,177 @@ static void udp_sock_handler_splice(struct ctx *c, union epoll_ref ref,
 	sendmmsg(s, udp_mmh_sendto, n, MSG_NOSIGNAL);
 }
 
+/**
+ * udp_sock_fill_data_v4() - Fill and queue one buffer. In pasta mode, write it
+ * @c:		Execution context
+ * @n:		Index of buffer in udp4_l2_buf pool
+ * @ref:	epoll reference from socket
+ * @msg_idx:	Index within message being prepared (spans multiple buffers)
+ * @msg_len:	Length of current message being prepared for sending
+ * @now:	Current timestamp
+ */
+static void udp_sock_fill_data_v4(struct ctx *c, int n, union epoll_ref ref,
+				  int *msg_idx, int *msg_bufs, ssize_t *msg_len,
+				  struct timespec *now)
+{
+	struct msghdr *mh = &udp6_l2_mh_tap[*msg_idx].msg_hdr;
+	struct udp4_l2_buf_t *b = &udp4_l2_buf[n];
+	size_t ip_len, buf_len;
+	in_port_t src_port;
+	in_addr_t src;
+
+	ip_len = udp4_l2_mh_sock[n].msg_len + sizeof(b->iph) + sizeof(b->uh);
+
+	b->iph.tot_len = htons(ip_len);
+
+	src = ntohl(b->s_in.sin_addr.s_addr);
+	src_port = htons(b->s_in.sin_port);
+
+	if (src >> IN_CLASSA_NSHIFT == IN_LOOPBACKNET ||
+	    src == INADDR_ANY || src == ntohl(c->addr4_seen)) {
+		b->iph.saddr = c->gw4;
+		udp_tap_map[V4][src_port].ts_local = now->tv_sec;
+
+		if (b->s_in.sin_addr.s_addr == c->addr4_seen)
+			udp_tap_map[V4][src_port].loopback = 0;
+		else
+			udp_tap_map[V4][src_port].loopback = 1;
+
+		bitmap_set(udp_act[V4][UDP_ACT_TAP], src_port);
+	} else if (c->dns4_fwd &&
+		   src == ntohl(c->dns4[0]) && ntohs(src_port) == 53) {
+		b->iph.saddr = c->dns4_fwd;
+	} else {
+		b->iph.saddr = b->s_in.sin_addr.s_addr;
+	}
+
+	udp_update_check4(b);
+	b->uh.source = b->s_in.sin_port;
+	b->uh.dest = htons(ref.r.p.udp.udp.port);
+	b->uh.len = htons(udp4_l2_mh_sock[n].msg_len + sizeof(b->uh));
+
+	if (c->mode == MODE_PASTA) {
+		if (write(c->fd_tap, &b->eh, sizeof(b->eh) + ip_len) < 0)
+			debug("tap write: %s", strerror(errno));
+		pcap((char *)&b->eh, sizeof(b->eh) + ip_len);
+
+		return;
+	}
+
+	b->vnet_len = htonl(ip_len + sizeof(struct ethhdr));
+	buf_len = sizeof(uint32_t) + sizeof(struct ethhdr) + ip_len;
+	udp4_l2_iov_tap[n].iov_len = buf_len;
+
+	/* With bigger messages, qemu closes the connection. */
+	if (*msg_bufs && *msg_len + buf_len > SHRT_MAX) {
+		mh->msg_iovlen = *msg_bufs;
+
+		(*msg_idx)++;
+		udp4_l2_mh_tap[*msg_idx].msg_hdr.msg_iov = &udp4_l2_iov_tap[n];
+		*msg_len = *msg_bufs = 0;
+	}
+
+	*msg_len += buf_len;
+	(*msg_bufs)++;
+}
+
+/**
+ * udp_sock_fill_data_v4() - Fill and queue one buffer. In pasta mode, write it
+ * @c:		Execution context
+ * @n:		Index of buffer in udp4_l2_buf pool
+ * @ref:	epoll reference from socket
+ * @msg_idx:	Index within message being prepared (spans multiple buffers)
+ * @msg_len:	Length of current message being prepared for sending
+ * @now:	Current timestamp
+ */
+static void udp_sock_fill_data_v6(struct ctx *c, int n, union epoll_ref ref,
+				  int *msg_idx, int *msg_bufs, ssize_t *msg_len,
+				  struct timespec *now)
+{
+	struct msghdr *mh = &udp6_l2_mh_tap[*msg_idx].msg_hdr;
+	struct udp6_l2_buf_t *b = &udp6_l2_buf[n];
+	size_t ip_len, buf_len;
+	struct in6_addr *src;
+	in_port_t src_port;
+
+	src = &b->s_in6.sin6_addr;
+	src_port = ntohs(b->s_in6.sin6_port);
+
+	ip_len = udp6_l2_mh_sock[n].msg_len + sizeof(b->ip6h) + sizeof(b->uh);
+
+	b->ip6h.payload_len = htons(udp6_l2_mh_sock[n].msg_len + sizeof(b->uh));
+
+	if (IN6_IS_ADDR_LINKLOCAL(src)) {
+		b->ip6h.daddr = c->addr6_ll_seen;
+		b->ip6h.saddr = b->s_in6.sin6_addr;
+	} else if (IN6_IS_ADDR_LOOPBACK(src)			||
+		   IN6_ARE_ADDR_EQUAL(src, &c->addr6_seen)	||
+		   IN6_ARE_ADDR_EQUAL(src, &c->addr6)) {
+		b->ip6h.daddr = c->addr6_ll_seen;
+
+		if (IN6_IS_ADDR_LINKLOCAL(&c->gw6))
+			b->ip6h.saddr = c->gw6;
+		else
+			b->ip6h.saddr = c->addr6_ll;
+
+		udp_tap_map[V6][src_port].ts_local = now->tv_sec;
+
+		if (IN6_IS_ADDR_LOOPBACK(src))
+			udp_tap_map[V6][src_port].loopback = 1;
+		else
+			udp_tap_map[V6][src_port].loopback = 0;
+
+		if (IN6_ARE_ADDR_EQUAL(src, &c->addr6))
+			udp_tap_map[V6][src_port].gua = 1;
+		else
+			udp_tap_map[V6][src_port].gua = 0;
+
+		bitmap_set(udp_act[V6][UDP_ACT_TAP], src_port);
+	} else if (!IN6_IS_ADDR_UNSPECIFIED(&c->dns6_fwd) &&
+		   IN6_ARE_ADDR_EQUAL(src, &c->dns6_fwd) && src_port == 53) {
+		b->ip6h.daddr = c->addr6_seen;
+		b->ip6h.saddr = c->dns6_fwd;
+	} else {
+		b->ip6h.daddr = c->addr6_seen;
+		b->ip6h.saddr = b->s_in6.sin6_addr;
+	}
+
+	b->uh.source = b->s_in6.sin6_port;
+	b->uh.dest = htons(ref.r.p.udp.udp.port);
+	b->uh.len = b->ip6h.payload_len;
+
+	b->ip6h.hop_limit = IPPROTO_UDP;
+	b->ip6h.version = b->ip6h.nexthdr = b->uh.check = 0;
+	b->uh.check = csum(&b->ip6h, ip_len, 0);
+	b->ip6h.version = 6;
+	b->ip6h.nexthdr = IPPROTO_UDP;
+	b->ip6h.hop_limit = 255;
+
+	if (c->mode == MODE_PASTA) {
+		if (write(c->fd_tap, &b->eh, sizeof(b->eh) + ip_len) < 0)
+			debug("tap write: %s", strerror(errno));
+		pcap((char *)&b->eh, sizeof(b->eh) + ip_len);
+
+		return;
+	}
+
+	b->vnet_len = htonl(ip_len + sizeof(struct ethhdr));
+	buf_len = sizeof(uint32_t) + sizeof(struct ethhdr) + ip_len;
+	udp6_l2_iov_tap[n].iov_len = buf_len;
+
+	/* With bigger messages, qemu closes the connection. */
+	if (*msg_bufs && *msg_len + buf_len > SHRT_MAX) {
+		mh->msg_iovlen = *msg_bufs;
+
+		(*msg_idx)++;
+		udp6_l2_mh_tap[*msg_idx].msg_hdr.msg_iov = &udp6_l2_iov_tap[n];
+		*msg_len = *msg_bufs = 0;
+	}
+
+	*msg_len += buf_len;
+	(*msg_bufs)++;
+}
+
 /**
  * udp_sock_handler() - Handle new data from socket
  * @c:		Execution context
@@ -668,10 +839,10 @@ static void udp_sock_handler_splice(struct ctx *c, union epoll_ref ref,
 void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
 		      struct timespec *now)
 {
-	int iov_in_msg, msg_i = 0, ret;
-	ssize_t n, msglen, missing = 0;
+	ssize_t n, msg_len = 0, missing = 0;
+	int msg_bufs = 0, msg_i = 0, ret;
 	struct mmsghdr *tap_mmh;
-	struct msghdr *cur_mh;
+	struct msghdr *last_mh;
 	unsigned int i;
 
 	if (events == EPOLLERR)
@@ -687,183 +858,34 @@ void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
 		if (n <= 0)
 			return;
 
-		cur_mh = &udp6_l2_mh_tap[msg_i].msg_hdr;
-		cur_mh->msg_iov = &udp6_l2_iov_tap[0];
-		msg_i = msglen = iov_in_msg = 0;
+		udp6_l2_mh_tap[0].msg_hdr.msg_iov = &udp6_l2_iov_tap[0];
 
 		for (i = 0; i < (unsigned)n; i++) {
-			struct udp6_l2_buf_t *b = &udp6_l2_buf[i];
-			size_t ip_len, iov_len;
-
-			ip_len = udp6_l2_mh_sock[i].msg_len +
-				 sizeof(b->ip6h) + sizeof(b->uh);
-
-			b->ip6h.payload_len = htons(udp6_l2_mh_sock[i].msg_len +
-						    sizeof(b->uh));
-
-			if (IN6_IS_ADDR_LINKLOCAL(&b->s_in6.sin6_addr)) {
-				b->ip6h.daddr = c->addr6_ll_seen;
-				b->ip6h.saddr = b->s_in6.sin6_addr;
-			} else if (IN6_IS_ADDR_LOOPBACK(&b->s_in6.sin6_addr) ||
-				   !memcmp(&b->s_in6.sin6_addr, &c->addr6_seen,
-					   sizeof(c->addr6)) ||
-				   !memcmp(&b->s_in6.sin6_addr, &c->addr6,
-					   sizeof(c->addr6))) {
-				in_port_t src = htons(b->s_in6.sin6_port);
-
-				b->ip6h.daddr = c->addr6_ll_seen;
-
-				if (IN6_IS_ADDR_LINKLOCAL(&c->gw6))
-					b->ip6h.saddr = c->gw6;
-				else
-					b->ip6h.saddr = c->addr6_ll;
-
-				udp_tap_map[V6][src].ts_local = now->tv_sec;
-
-				if (IN6_IS_ADDR_LOOPBACK(&b->s_in6.sin6_addr))
-					udp_tap_map[V6][src].loopback = 1;
-				else
-					udp_tap_map[V6][src].loopback = 0;
-
-				if (!memcmp(&b->s_in6.sin6_addr, &c->addr6,
-						 sizeof(c->addr6)))
-					udp_tap_map[V6][src].gua = 1;
-				else
-					udp_tap_map[V6][src].gua = 0;
-
-				bitmap_set(udp_act[V6][UDP_ACT_TAP], src);
-			} else if (!IN6_IS_ADDR_UNSPECIFIED(&c->dns6_fwd) &&
-				   !memcmp(&b->s_in6.sin6_addr, &c->dns6_fwd,
-					   sizeof(c->dns6_fwd)) &&
-				   ntohs(b->s_in6.sin6_port) == 53) {
-				b->ip6h.daddr = c->addr6_seen;
-				b->ip6h.saddr = c->dns6_fwd;
-			} else {
-				b->ip6h.daddr = c->addr6_seen;
-				b->ip6h.saddr = b->s_in6.sin6_addr;
-			}
-
-			b->uh.source = b->s_in6.sin6_port;
-			b->uh.dest = htons(ref.r.p.udp.udp.port);
-			b->uh.len = b->ip6h.payload_len;
-
-			b->ip6h.hop_limit = IPPROTO_UDP;
-			b->ip6h.version = 0;
-			b->ip6h.nexthdr = 0;
-			b->uh.check = 0;
-			b->uh.check = csum(&b->ip6h, ip_len, 0);
-			b->ip6h.version = 6;
-			b->ip6h.nexthdr = IPPROTO_UDP;
-			b->ip6h.hop_limit = 255;
-
-			if (c->mode == MODE_PASTA) {
-				ip_len += sizeof(struct ethhdr);
-				if (write(c->fd_tap, &b->eh, ip_len) < 0)
-					debug("tap write: %s", strerror(errno));
-				pcap((char *)&b->eh, ip_len);
-				continue;
-			}
-
-			b->vnet_len = htonl(ip_len + sizeof(struct ethhdr));
-			iov_len = sizeof(uint32_t) + sizeof(struct ethhdr) +
-				  ip_len;
-			udp6_l2_iov_tap[i].iov_len = iov_len;
-
-			/* With bigger messages, qemu closes the connection. */
-			if (iov_in_msg && msglen + iov_len > SHRT_MAX) {
-				cur_mh->msg_iovlen = iov_in_msg;
-
-				cur_mh = &udp6_l2_mh_tap[++msg_i].msg_hdr;
-				msglen = iov_in_msg = 0;
-				cur_mh->msg_iov = &udp6_l2_iov_tap[i];
-			}
-
-			msglen += iov_len;
-			iov_in_msg++;
+			udp_sock_fill_data_v6(c, i, ref,
+					      &msg_i, &msg_bufs, &msg_len, now);
 		}
 
+		udp6_l2_mh_tap[msg_i].msg_hdr.msg_iovlen = msg_bufs;
 		tap_mmh = udp6_l2_mh_tap;
 	} else {
 		n = recvmmsg(ref.r.s, udp4_l2_mh_sock, UDP_TAP_FRAMES, 0, NULL);
 		if (n <= 0)
 			return;
 
-		cur_mh = &udp4_l2_mh_tap[msg_i].msg_hdr;
-		cur_mh->msg_iov = &udp4_l2_iov_tap[0];
-		msg_i = msglen = iov_in_msg = 0;
+		udp6_l2_mh_tap[0].msg_hdr.msg_iov = &udp6_l2_iov_tap[0];
 
 		for (i = 0; i < (unsigned)n; i++) {
-			struct udp4_l2_buf_t *b = &udp4_l2_buf[i];
-			size_t ip_len, iov_len;
-			in_addr_t s_addr;
-
-			ip_len = udp4_l2_mh_sock[i].msg_len +
-				 sizeof(b->iph) + sizeof(b->uh);
-
-			b->iph.tot_len = htons(ip_len);
-
-			s_addr = ntohl(b->s_in.sin_addr.s_addr);
-			if (s_addr >> IN_CLASSA_NSHIFT == IN_LOOPBACKNET ||
-			    s_addr == INADDR_ANY ||
-			    s_addr == ntohl(c->addr4_seen)) {
-				in_port_t src = htons(b->s_in.sin_port);
-
-				b->iph.saddr = c->gw4;
-				udp_tap_map[V4][src].ts_local = now->tv_sec;
-
-				if (b->s_in.sin_addr.s_addr == c->addr4_seen)
-					udp_tap_map[V4][src].loopback = 0;
-				else
-					udp_tap_map[V4][src].loopback = 1;
-
-				bitmap_set(udp_act[V4][UDP_ACT_TAP], src);
-			} else if (c->dns4_fwd &&
-				   s_addr == ntohl(c->dns4[0]) &&
-				   ntohs(b->s_in.sin_port) == 53) {
-				b->iph.saddr = c->dns4_fwd;
-			} else {
-				b->iph.saddr = b->s_in.sin_addr.s_addr;
-			}
-
-			udp_update_check4(b);
-			b->uh.source = b->s_in.sin_port;
-			b->uh.dest = htons(ref.r.p.udp.udp.port);
-			b->uh.len = ntohs(udp4_l2_mh_sock[i].msg_len +
-					  sizeof(b->uh));
-
-			if (c->mode == MODE_PASTA) {
-				ip_len += sizeof(struct ethhdr);
-				if (write(c->fd_tap, &b->eh, ip_len) < 0)
-					debug("tap write: %s", strerror(errno));
-				pcap((char *)&b->eh, ip_len);
-				continue;
-			}
-
-			b->vnet_len = htonl(ip_len + sizeof(struct ethhdr));
-			iov_len = sizeof(uint32_t) + sizeof(struct ethhdr) +
-				  ip_len;
-			udp4_l2_iov_tap[i].iov_len = iov_len;
-
-			/* With bigger messages, qemu closes the connection. */
-			if (iov_in_msg && msglen + iov_len > SHRT_MAX) {
-				cur_mh->msg_iovlen = iov_in_msg;
-
-				cur_mh = &udp4_l2_mh_tap[++msg_i].msg_hdr;
-				msglen = iov_in_msg = 0;
-				cur_mh->msg_iov = &udp4_l2_iov_tap[i];
-			}
-
-			msglen += iov_len;
-			iov_in_msg++;
+			udp_sock_fill_data_v4(c, i, ref,
+					      &msg_i, &msg_bufs, &msg_len, now);
 		}
 
+		udp4_l2_mh_tap[msg_i].msg_hdr.msg_iovlen = msg_bufs;
 		tap_mmh = udp4_l2_mh_tap;
 	}
 
 	if (c->mode == MODE_PASTA)
 		return;
 
-	cur_mh->msg_iovlen = iov_in_msg;
 	ret = sendmmsg(c->fd_tap, tap_mmh, msg_i + 1,
 		       MSG_NOSIGNAL | MSG_DONTWAIT);
 	if (ret <= 0)
@@ -887,25 +909,25 @@ void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
 	 *
 	 * re-send everything from here:		   ^--  -----   ------
 	 */
-	cur_mh = &tap_mmh[ret - 1].msg_hdr;
-	for (i = 0, msglen = 0; i < cur_mh->msg_iovlen; i++) {
+	last_mh = &tap_mmh[ret - 1].msg_hdr;
+	for (i = 0, msg_len = 0; i < last_mh->msg_iovlen; i++) {
 		if (missing <= 0) {
-			msglen += cur_mh->msg_iov[i].iov_len;
-			missing = msglen - tap_mmh[ret - 1].msg_len;
+			msg_len += last_mh->msg_iov[i].iov_len;
+			missing = msg_len - tap_mmh[ret - 1].msg_len;
 		}
 
 		if (missing > 0) {
 			uint8_t **iov_base;
 			int first_offset;
 
-			iov_base = (uint8_t **)&cur_mh->msg_iov[i].iov_base;
-			first_offset = cur_mh->msg_iov[i].iov_len - missing;
+			iov_base = (uint8_t **)&last_mh->msg_iov[i].iov_base;
+			first_offset = last_mh->msg_iov[i].iov_len - missing;
 			*iov_base += first_offset;
-			cur_mh->msg_iov[i].iov_len = missing;
+			last_mh->msg_iov[i].iov_len = missing;
 
-			cur_mh->msg_iov = &cur_mh->msg_iov[i];
+			last_mh->msg_iov = &last_mh->msg_iov[i];
 
-			sendmsg(c->fd_tap, cur_mh, MSG_NOSIGNAL);
+			sendmsg(c->fd_tap, last_mh, MSG_NOSIGNAL);
 
 			*iov_base -= first_offset;
 			break;
@@ -997,7 +1019,7 @@ int udp_tap_handler(struct ctx *c, int af, void *addr,
 		sa = (struct sockaddr *)&s_in6;
 		sl = sizeof(s_in6);
 
-		if (!memcmp(addr, &c->gw6, sizeof(c->gw6)) && !c->no_map_gw) {
+		if (IN6_ARE_ADDR_EQUAL(addr, &c->gw6) && !c->no_map_gw) {
 			if (!udp_tap_map[V6][dst].ts_local ||
 			    udp_tap_map[V6][dst].loopback)
 				s_in6.sin6_addr = in6addr_loopback;
@@ -1005,7 +1027,7 @@ int udp_tap_handler(struct ctx *c, int af, void *addr,
 				s_in6.sin6_addr = c->addr6;
 			else
 				s_in6.sin6_addr = c->addr6_seen;
-		} else if (!memcmp(addr, &c->dns6_fwd, sizeof(c->dns6_fwd)) &&
+		} else if (IN6_ARE_ADDR_EQUAL(addr, &c->dns6_fwd) &&
 			   ntohs(s_in6.sin6_port) == 53) {
 			s_in6.sin6_addr = c->dns6[0];
 		} else if (IN6_IS_ADDR_LINKLOCAL(&s_in6.sin6_addr)) {
-- 
2.35.1


  parent reply	other threads:[~2022-03-25 22:52 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-25 22:52 [PATCH 00/24] Boundary-checked "packets", TCP timerfd timeouts, assorted fixes Stefano Brivio
2022-03-25 22:52 ` [PATCH 01/24] conf, util, tap: Implement --trace option for extra verbose logging Stefano Brivio
2022-03-25 22:52 ` [PATCH 02/24] pcap: Fix mistake in printed string Stefano Brivio
2022-03-25 22:52 ` [PATCH 03/24] util: Drop CHECK_SET_MIN_MAX{,_PROTO_FD} macros Stefano Brivio
2022-03-25 22:52 ` [PATCH 04/24] util: Use standard int types Stefano Brivio
2022-03-25 22:52 ` [PATCH 05/24] tcp: Refactor to use events instead of states, split out spliced implementation Stefano Brivio
2022-03-25 22:52 ` [PATCH 06/24] test/lib/video: Fill in href attributes of video shortcuts Stefano Brivio
2022-03-25 22:52 ` [PATCH 07/24] udp: Drop _splice from recv, send, sendto static buffer names Stefano Brivio
2022-03-25 22:52 ` Stefano Brivio [this message]
2022-03-25 22:52 ` [PATCH 09/24] dhcpv6, tap, tcp: Use IN6_ARE_ADDR_EQUAL instead of open-coded memcmp() Stefano Brivio
2022-03-25 22:52 ` [PATCH 10/24] udp: Use flags for local, loopback, and configured unicast binds Stefano Brivio
2022-03-25 22:52 ` [PATCH 11/24] Makefile: Enable a few hardening flags Stefano Brivio
2022-03-25 22:52 ` [PATCH 12/24] test: Add asciinema(1) as requirement for CI in README Stefano Brivio
2022-03-25 22:52 ` [PATCH 13/24] test, seccomp, Makefile: Switch to valgrind runs for passt functional tests Stefano Brivio
2022-03-25 22:52 ` [PATCH 14/24] tcp, udp, util: Enforce 24-bit limit on socket numbers Stefano Brivio
2022-03-25 22:52 ` [PATCH 15/24] tcp: Rework timers to use timerfd instead of periodic bitmap scan Stefano Brivio
2022-03-25 22:52 ` [PATCH 16/24] tcp_splice: Close sockets right away on high number of open files Stefano Brivio
2022-03-25 22:52 ` [PATCH 17/24] test/perf: Work-around for virtio_net hang before long streams from guest Stefano Brivio
2022-03-25 22:52 ` [PATCH 18/24] README: Avoid "here" links Stefano Brivio
2022-03-25 22:52 ` [PATCH 19/24] README: Update Interfaces and Availability sections Stefano Brivio
2022-03-25 22:52 ` [PATCH 20/24] tcp: Fit struct tcp_conn into a single 64-byte cacheline Stefano Brivio
2022-03-25 22:52 ` [PATCH 21/24] dhcp: Minimum option length implied by RFC 951 is 60 bytes, not 62 Stefano Brivio
2022-03-25 22:52 ` [PATCH 22/24] tcp, tcp_splice: Use less awkward syntax to swap in/out sockets from pools Stefano Brivio
2022-03-25 22:52 ` [PATCH 23/24] util: Fix function declaration style of write_pidfile() Stefano Brivio
2022-03-25 22:53 ` [PATCH 24/24] treewide: Packet abstraction with mandatory boundary checks Stefano Brivio

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220325225300.2803584-9-sbrivio@redhat.com \
    --to=sbrivio@redhat.com \
    --cc=passt-dev@passt.top \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).