From mboxrd@z Thu Jan 1 00:00:00 1970 From: Stefano Brivio To: passt-dev@passt.top Subject: [PATCH 08/24] udp: Split buffer queueing/writing parts of udp_sock_handler() Date: Fri, 25 Mar 2022 23:52:44 +0100 Message-ID: <20220325225300.2803584-9-sbrivio@redhat.com> In-Reply-To: <20220325225300.2803584-1-sbrivio@redhat.com> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="===============5188370086197651157==" --===============5188370086197651157== Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable ...it became too hard to follow: split it off to udp_sock_fill_data_v{4,6}. While at it, use IN6_ARE_ADDR_EQUAL(a, b), courtesy of netinet/in.h, instead of open-coded memcmp(). Signed-off-by: Stefano Brivio --- udp.c | 364 +++++++++++++++++++++++++++++++--------------------------- 1 file changed, 193 insertions(+), 171 deletions(-) diff --git a/udp.c b/udp.c index ce536a6..ebbcda1 100644 --- a/udp.c +++ b/udp.c @@ -655,6 +655,177 @@ static void udp_sock_handler_splice(struct ctx *c, unio= n epoll_ref ref, sendmmsg(s, udp_mmh_sendto, n, MSG_NOSIGNAL); } =20 +/** + * udp_sock_fill_data_v4() - Fill and queue one buffer. In pasta mode, write= it + * @c: Execution context + * @n: Index of buffer in udp4_l2_buf pool + * @ref: epoll reference from socket + * @msg_idx: Index within message being prepared (spans multiple buffers) + * @msg_len: Length of current message being prepared for sending + * @now: Current timestamp + */ +static void udp_sock_fill_data_v4(struct ctx *c, int n, union epoll_ref ref, + int *msg_idx, int *msg_bufs, ssize_t *msg_len, + struct timespec *now) +{ + struct msghdr *mh =3D &udp6_l2_mh_tap[*msg_idx].msg_hdr; + struct udp4_l2_buf_t *b =3D &udp4_l2_buf[n]; + size_t ip_len, buf_len; + in_port_t src_port; + in_addr_t src; + + ip_len =3D udp4_l2_mh_sock[n].msg_len + sizeof(b->iph) + sizeof(b->uh); + + b->iph.tot_len =3D htons(ip_len); + + src =3D ntohl(b->s_in.sin_addr.s_addr); + src_port =3D htons(b->s_in.sin_port); + + if (src >> IN_CLASSA_NSHIFT =3D=3D IN_LOOPBACKNET || + src =3D=3D INADDR_ANY || src =3D=3D ntohl(c->addr4_seen)) { + b->iph.saddr =3D c->gw4; + udp_tap_map[V4][src_port].ts_local =3D now->tv_sec; + + if (b->s_in.sin_addr.s_addr =3D=3D c->addr4_seen) + udp_tap_map[V4][src_port].loopback =3D 0; + else + udp_tap_map[V4][src_port].loopback =3D 1; + + bitmap_set(udp_act[V4][UDP_ACT_TAP], src_port); + } else if (c->dns4_fwd && + src =3D=3D ntohl(c->dns4[0]) && ntohs(src_port) =3D=3D 53) { + b->iph.saddr =3D c->dns4_fwd; + } else { + b->iph.saddr =3D b->s_in.sin_addr.s_addr; + } + + udp_update_check4(b); + b->uh.source =3D b->s_in.sin_port; + b->uh.dest =3D htons(ref.r.p.udp.udp.port); + b->uh.len =3D htons(udp4_l2_mh_sock[n].msg_len + sizeof(b->uh)); + + if (c->mode =3D=3D MODE_PASTA) { + if (write(c->fd_tap, &b->eh, sizeof(b->eh) + ip_len) < 0) + debug("tap write: %s", strerror(errno)); + pcap((char *)&b->eh, sizeof(b->eh) + ip_len); + + return; + } + + b->vnet_len =3D htonl(ip_len + sizeof(struct ethhdr)); + buf_len =3D sizeof(uint32_t) + sizeof(struct ethhdr) + ip_len; + udp4_l2_iov_tap[n].iov_len =3D buf_len; + + /* With bigger messages, qemu closes the connection. */ + if (*msg_bufs && *msg_len + buf_len > SHRT_MAX) { + mh->msg_iovlen =3D *msg_bufs; + + (*msg_idx)++; + udp4_l2_mh_tap[*msg_idx].msg_hdr.msg_iov =3D &udp4_l2_iov_tap[n]; + *msg_len =3D *msg_bufs =3D 0; + } + + *msg_len +=3D buf_len; + (*msg_bufs)++; +} + +/** + * udp_sock_fill_data_v4() - Fill and queue one buffer. In pasta mode, write= it + * @c: Execution context + * @n: Index of buffer in udp4_l2_buf pool + * @ref: epoll reference from socket + * @msg_idx: Index within message being prepared (spans multiple buffers) + * @msg_len: Length of current message being prepared for sending + * @now: Current timestamp + */ +static void udp_sock_fill_data_v6(struct ctx *c, int n, union epoll_ref ref, + int *msg_idx, int *msg_bufs, ssize_t *msg_len, + struct timespec *now) +{ + struct msghdr *mh =3D &udp6_l2_mh_tap[*msg_idx].msg_hdr; + struct udp6_l2_buf_t *b =3D &udp6_l2_buf[n]; + size_t ip_len, buf_len; + struct in6_addr *src; + in_port_t src_port; + + src =3D &b->s_in6.sin6_addr; + src_port =3D ntohs(b->s_in6.sin6_port); + + ip_len =3D udp6_l2_mh_sock[n].msg_len + sizeof(b->ip6h) + sizeof(b->uh); + + b->ip6h.payload_len =3D htons(udp6_l2_mh_sock[n].msg_len + sizeof(b->uh)); + + if (IN6_IS_ADDR_LINKLOCAL(src)) { + b->ip6h.daddr =3D c->addr6_ll_seen; + b->ip6h.saddr =3D b->s_in6.sin6_addr; + } else if (IN6_IS_ADDR_LOOPBACK(src) || + IN6_ARE_ADDR_EQUAL(src, &c->addr6_seen) || + IN6_ARE_ADDR_EQUAL(src, &c->addr6)) { + b->ip6h.daddr =3D c->addr6_ll_seen; + + if (IN6_IS_ADDR_LINKLOCAL(&c->gw6)) + b->ip6h.saddr =3D c->gw6; + else + b->ip6h.saddr =3D c->addr6_ll; + + udp_tap_map[V6][src_port].ts_local =3D now->tv_sec; + + if (IN6_IS_ADDR_LOOPBACK(src)) + udp_tap_map[V6][src_port].loopback =3D 1; + else + udp_tap_map[V6][src_port].loopback =3D 0; + + if (IN6_ARE_ADDR_EQUAL(src, &c->addr6)) + udp_tap_map[V6][src_port].gua =3D 1; + else + udp_tap_map[V6][src_port].gua =3D 0; + + bitmap_set(udp_act[V6][UDP_ACT_TAP], src_port); + } else if (!IN6_IS_ADDR_UNSPECIFIED(&c->dns6_fwd) && + IN6_ARE_ADDR_EQUAL(src, &c->dns6_fwd) && src_port =3D=3D 53) { + b->ip6h.daddr =3D c->addr6_seen; + b->ip6h.saddr =3D c->dns6_fwd; + } else { + b->ip6h.daddr =3D c->addr6_seen; + b->ip6h.saddr =3D b->s_in6.sin6_addr; + } + + b->uh.source =3D b->s_in6.sin6_port; + b->uh.dest =3D htons(ref.r.p.udp.udp.port); + b->uh.len =3D b->ip6h.payload_len; + + b->ip6h.hop_limit =3D IPPROTO_UDP; + b->ip6h.version =3D b->ip6h.nexthdr =3D b->uh.check =3D 0; + b->uh.check =3D csum(&b->ip6h, ip_len, 0); + b->ip6h.version =3D 6; + b->ip6h.nexthdr =3D IPPROTO_UDP; + b->ip6h.hop_limit =3D 255; + + if (c->mode =3D=3D MODE_PASTA) { + if (write(c->fd_tap, &b->eh, sizeof(b->eh) + ip_len) < 0) + debug("tap write: %s", strerror(errno)); + pcap((char *)&b->eh, sizeof(b->eh) + ip_len); + + return; + } + + b->vnet_len =3D htonl(ip_len + sizeof(struct ethhdr)); + buf_len =3D sizeof(uint32_t) + sizeof(struct ethhdr) + ip_len; + udp6_l2_iov_tap[n].iov_len =3D buf_len; + + /* With bigger messages, qemu closes the connection. */ + if (*msg_bufs && *msg_len + buf_len > SHRT_MAX) { + mh->msg_iovlen =3D *msg_bufs; + + (*msg_idx)++; + udp6_l2_mh_tap[*msg_idx].msg_hdr.msg_iov =3D &udp6_l2_iov_tap[n]; + *msg_len =3D *msg_bufs =3D 0; + } + + *msg_len +=3D buf_len; + (*msg_bufs)++; +} + /** * udp_sock_handler() - Handle new data from socket * @c: Execution context @@ -668,10 +839,10 @@ static void udp_sock_handler_splice(struct ctx *c, unio= n epoll_ref ref, void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events, struct timespec *now) { - int iov_in_msg, msg_i =3D 0, ret; - ssize_t n, msglen, missing =3D 0; + ssize_t n, msg_len =3D 0, missing =3D 0; + int msg_bufs =3D 0, msg_i =3D 0, ret; struct mmsghdr *tap_mmh; - struct msghdr *cur_mh; + struct msghdr *last_mh; unsigned int i; =20 if (events =3D=3D EPOLLERR) @@ -687,183 +858,34 @@ void udp_sock_handler(struct ctx *c, union epoll_ref r= ef, uint32_t events, if (n <=3D 0) return; =20 - cur_mh =3D &udp6_l2_mh_tap[msg_i].msg_hdr; - cur_mh->msg_iov =3D &udp6_l2_iov_tap[0]; - msg_i =3D msglen =3D iov_in_msg =3D 0; + udp6_l2_mh_tap[0].msg_hdr.msg_iov =3D &udp6_l2_iov_tap[0]; =20 for (i =3D 0; i < (unsigned)n; i++) { - struct udp6_l2_buf_t *b =3D &udp6_l2_buf[i]; - size_t ip_len, iov_len; - - ip_len =3D udp6_l2_mh_sock[i].msg_len + - sizeof(b->ip6h) + sizeof(b->uh); - - b->ip6h.payload_len =3D htons(udp6_l2_mh_sock[i].msg_len + - sizeof(b->uh)); - - if (IN6_IS_ADDR_LINKLOCAL(&b->s_in6.sin6_addr)) { - b->ip6h.daddr =3D c->addr6_ll_seen; - b->ip6h.saddr =3D b->s_in6.sin6_addr; - } else if (IN6_IS_ADDR_LOOPBACK(&b->s_in6.sin6_addr) || - !memcmp(&b->s_in6.sin6_addr, &c->addr6_seen, - sizeof(c->addr6)) || - !memcmp(&b->s_in6.sin6_addr, &c->addr6, - sizeof(c->addr6))) { - in_port_t src =3D htons(b->s_in6.sin6_port); - - b->ip6h.daddr =3D c->addr6_ll_seen; - - if (IN6_IS_ADDR_LINKLOCAL(&c->gw6)) - b->ip6h.saddr =3D c->gw6; - else - b->ip6h.saddr =3D c->addr6_ll; - - udp_tap_map[V6][src].ts_local =3D now->tv_sec; - - if (IN6_IS_ADDR_LOOPBACK(&b->s_in6.sin6_addr)) - udp_tap_map[V6][src].loopback =3D 1; - else - udp_tap_map[V6][src].loopback =3D 0; - - if (!memcmp(&b->s_in6.sin6_addr, &c->addr6, - sizeof(c->addr6))) - udp_tap_map[V6][src].gua =3D 1; - else - udp_tap_map[V6][src].gua =3D 0; - - bitmap_set(udp_act[V6][UDP_ACT_TAP], src); - } else if (!IN6_IS_ADDR_UNSPECIFIED(&c->dns6_fwd) && - !memcmp(&b->s_in6.sin6_addr, &c->dns6_fwd, - sizeof(c->dns6_fwd)) && - ntohs(b->s_in6.sin6_port) =3D=3D 53) { - b->ip6h.daddr =3D c->addr6_seen; - b->ip6h.saddr =3D c->dns6_fwd; - } else { - b->ip6h.daddr =3D c->addr6_seen; - b->ip6h.saddr =3D b->s_in6.sin6_addr; - } - - b->uh.source =3D b->s_in6.sin6_port; - b->uh.dest =3D htons(ref.r.p.udp.udp.port); - b->uh.len =3D b->ip6h.payload_len; - - b->ip6h.hop_limit =3D IPPROTO_UDP; - b->ip6h.version =3D 0; - b->ip6h.nexthdr =3D 0; - b->uh.check =3D 0; - b->uh.check =3D csum(&b->ip6h, ip_len, 0); - b->ip6h.version =3D 6; - b->ip6h.nexthdr =3D IPPROTO_UDP; - b->ip6h.hop_limit =3D 255; - - if (c->mode =3D=3D MODE_PASTA) { - ip_len +=3D sizeof(struct ethhdr); - if (write(c->fd_tap, &b->eh, ip_len) < 0) - debug("tap write: %s", strerror(errno)); - pcap((char *)&b->eh, ip_len); - continue; - } - - b->vnet_len =3D htonl(ip_len + sizeof(struct ethhdr)); - iov_len =3D sizeof(uint32_t) + sizeof(struct ethhdr) + - ip_len; - udp6_l2_iov_tap[i].iov_len =3D iov_len; - - /* With bigger messages, qemu closes the connection. */ - if (iov_in_msg && msglen + iov_len > SHRT_MAX) { - cur_mh->msg_iovlen =3D iov_in_msg; - - cur_mh =3D &udp6_l2_mh_tap[++msg_i].msg_hdr; - msglen =3D iov_in_msg =3D 0; - cur_mh->msg_iov =3D &udp6_l2_iov_tap[i]; - } - - msglen +=3D iov_len; - iov_in_msg++; + udp_sock_fill_data_v6(c, i, ref, + &msg_i, &msg_bufs, &msg_len, now); } =20 + udp6_l2_mh_tap[msg_i].msg_hdr.msg_iovlen =3D msg_bufs; tap_mmh =3D udp6_l2_mh_tap; } else { n =3D recvmmsg(ref.r.s, udp4_l2_mh_sock, UDP_TAP_FRAMES, 0, NULL); if (n <=3D 0) return; =20 - cur_mh =3D &udp4_l2_mh_tap[msg_i].msg_hdr; - cur_mh->msg_iov =3D &udp4_l2_iov_tap[0]; - msg_i =3D msglen =3D iov_in_msg =3D 0; + udp6_l2_mh_tap[0].msg_hdr.msg_iov =3D &udp6_l2_iov_tap[0]; =20 for (i =3D 0; i < (unsigned)n; i++) { - struct udp4_l2_buf_t *b =3D &udp4_l2_buf[i]; - size_t ip_len, iov_len; - in_addr_t s_addr; - - ip_len =3D udp4_l2_mh_sock[i].msg_len + - sizeof(b->iph) + sizeof(b->uh); - - b->iph.tot_len =3D htons(ip_len); - - s_addr =3D ntohl(b->s_in.sin_addr.s_addr); - if (s_addr >> IN_CLASSA_NSHIFT =3D=3D IN_LOOPBACKNET || - s_addr =3D=3D INADDR_ANY || - s_addr =3D=3D ntohl(c->addr4_seen)) { - in_port_t src =3D htons(b->s_in.sin_port); - - b->iph.saddr =3D c->gw4; - udp_tap_map[V4][src].ts_local =3D now->tv_sec; - - if (b->s_in.sin_addr.s_addr =3D=3D c->addr4_seen) - udp_tap_map[V4][src].loopback =3D 0; - else - udp_tap_map[V4][src].loopback =3D 1; - - bitmap_set(udp_act[V4][UDP_ACT_TAP], src); - } else if (c->dns4_fwd && - s_addr =3D=3D ntohl(c->dns4[0]) && - ntohs(b->s_in.sin_port) =3D=3D 53) { - b->iph.saddr =3D c->dns4_fwd; - } else { - b->iph.saddr =3D b->s_in.sin_addr.s_addr; - } - - udp_update_check4(b); - b->uh.source =3D b->s_in.sin_port; - b->uh.dest =3D htons(ref.r.p.udp.udp.port); - b->uh.len =3D ntohs(udp4_l2_mh_sock[i].msg_len + - sizeof(b->uh)); - - if (c->mode =3D=3D MODE_PASTA) { - ip_len +=3D sizeof(struct ethhdr); - if (write(c->fd_tap, &b->eh, ip_len) < 0) - debug("tap write: %s", strerror(errno)); - pcap((char *)&b->eh, ip_len); - continue; - } - - b->vnet_len =3D htonl(ip_len + sizeof(struct ethhdr)); - iov_len =3D sizeof(uint32_t) + sizeof(struct ethhdr) + - ip_len; - udp4_l2_iov_tap[i].iov_len =3D iov_len; - - /* With bigger messages, qemu closes the connection. */ - if (iov_in_msg && msglen + iov_len > SHRT_MAX) { - cur_mh->msg_iovlen =3D iov_in_msg; - - cur_mh =3D &udp4_l2_mh_tap[++msg_i].msg_hdr; - msglen =3D iov_in_msg =3D 0; - cur_mh->msg_iov =3D &udp4_l2_iov_tap[i]; - } - - msglen +=3D iov_len; - iov_in_msg++; + udp_sock_fill_data_v4(c, i, ref, + &msg_i, &msg_bufs, &msg_len, now); } =20 + udp4_l2_mh_tap[msg_i].msg_hdr.msg_iovlen =3D msg_bufs; tap_mmh =3D udp4_l2_mh_tap; } =20 if (c->mode =3D=3D MODE_PASTA) return; =20 - cur_mh->msg_iovlen =3D iov_in_msg; ret =3D sendmmsg(c->fd_tap, tap_mmh, msg_i + 1, MSG_NOSIGNAL | MSG_DONTWAIT); if (ret <=3D 0) @@ -887,25 +909,25 @@ void udp_sock_handler(struct ctx *c, union epoll_ref re= f, uint32_t events, * * re-send everything from here: ^-- ----- ------ */ - cur_mh =3D &tap_mmh[ret - 1].msg_hdr; - for (i =3D 0, msglen =3D 0; i < cur_mh->msg_iovlen; i++) { + last_mh =3D &tap_mmh[ret - 1].msg_hdr; + for (i =3D 0, msg_len =3D 0; i < last_mh->msg_iovlen; i++) { if (missing <=3D 0) { - msglen +=3D cur_mh->msg_iov[i].iov_len; - missing =3D msglen - tap_mmh[ret - 1].msg_len; + msg_len +=3D last_mh->msg_iov[i].iov_len; + missing =3D msg_len - tap_mmh[ret - 1].msg_len; } =20 if (missing > 0) { uint8_t **iov_base; int first_offset; =20 - iov_base =3D (uint8_t **)&cur_mh->msg_iov[i].iov_base; - first_offset =3D cur_mh->msg_iov[i].iov_len - missing; + iov_base =3D (uint8_t **)&last_mh->msg_iov[i].iov_base; + first_offset =3D last_mh->msg_iov[i].iov_len - missing; *iov_base +=3D first_offset; - cur_mh->msg_iov[i].iov_len =3D missing; + last_mh->msg_iov[i].iov_len =3D missing; =20 - cur_mh->msg_iov =3D &cur_mh->msg_iov[i]; + last_mh->msg_iov =3D &last_mh->msg_iov[i]; =20 - sendmsg(c->fd_tap, cur_mh, MSG_NOSIGNAL); + sendmsg(c->fd_tap, last_mh, MSG_NOSIGNAL); =20 *iov_base -=3D first_offset; break; @@ -997,7 +1019,7 @@ int udp_tap_handler(struct ctx *c, int af, void *addr, sa =3D (struct sockaddr *)&s_in6; sl =3D sizeof(s_in6); =20 - if (!memcmp(addr, &c->gw6, sizeof(c->gw6)) && !c->no_map_gw) { + if (IN6_ARE_ADDR_EQUAL(addr, &c->gw6) && !c->no_map_gw) { if (!udp_tap_map[V6][dst].ts_local || udp_tap_map[V6][dst].loopback) s_in6.sin6_addr =3D in6addr_loopback; @@ -1005,7 +1027,7 @@ int udp_tap_handler(struct ctx *c, int af, void *addr, s_in6.sin6_addr =3D c->addr6; else s_in6.sin6_addr =3D c->addr6_seen; - } else if (!memcmp(addr, &c->dns6_fwd, sizeof(c->dns6_fwd)) && + } else if (IN6_ARE_ADDR_EQUAL(addr, &c->dns6_fwd) && ntohs(s_in6.sin6_port) =3D=3D 53) { s_in6.sin6_addr =3D c->dns6[0]; } else if (IN6_IS_ADDR_LINKLOCAL(&s_in6.sin6_addr)) { --=20 2.35.1 --===============5188370086197651157==--