On Thu, Nov 17, 2022 at 01:15:11AM +0100, Stefano Brivio wrote: > On Wed, 16 Nov 2022 15:42:06 +1100 > David Gibson wrote: > > > This bit in the TCP specific epoll reference indicates whether the > > connection is IPv6 or IPv4. However the sites which refer to it are > > already calling accept() which (optionally) returns an address for the > > remote end of the connection. We can use the sa_family field in that > > address to determine the connection type independent of the epoll > > reference. > > > > This does have a cost: for the spliced case, it means we now need to get > > that address from accept() which introduces an extran copy_to_user(). > > However, in future we want to allow handling IPv4 connectons through IPv6 > > sockets, which means we won't be able to determine the IP version at the > > time we create the listening socket and epoll reference. So, at some point > > we'll have to pay this cost anyway. > > > > Signed-off-by: David Gibson > > --- > > tcp.c | 10 ++++------ > > tcp.h | 2 -- > > tcp_splice.c | 9 ++++----- > > 3 files changed, 8 insertions(+), 13 deletions(-) > > > > diff --git a/tcp.c b/tcp.c > > index 0513b3b..b05ed6c 100644 > > --- a/tcp.c > > +++ b/tcp.c > > @@ -662,8 +662,7 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn) > > { > > int m = conn->c.in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD; > > union epoll_ref ref = { .r.proto = IPPROTO_TCP, .r.s = conn->sock, > > - .r.p.tcp.tcp.index = CONN_IDX(conn), > > - .r.p.tcp.tcp.v6 = CONN_V6(conn) }; > > + .r.p.tcp.tcp.index = CONN_IDX(conn) }; > > struct epoll_event ev = { .data.u64 = ref.u64 }; > > > > if (conn->events == CLOSED) { > > @@ -2745,7 +2744,7 @@ static void tcp_tap_conn_from_sock(struct ctx *c, union epoll_ref ref, > > conn->ws_to_tap = conn->ws_from_tap = 0; > > conn_event(c, conn, SOCK_ACCEPTED); > > > > - if (ref.r.p.tcp.tcp.v6) { > > + if (sa->sa_family == AF_INET6) { > > struct sockaddr_in6 sa6; > > > > memcpy(&sa6, sa, sizeof(sa6)); > > @@ -3019,8 +3018,7 @@ static void tcp_sock_init6(const struct ctx *c, > > in_port_t port) > > { > > in_port_t idx = port + c->tcp.fwd_in.delta[port]; > > - union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.v6 = 1, > > - .tcp.index = idx }; > > + union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.index = idx }; > > Excess whitespace (from earlier patch). Fixed > > int s; > > > > s = sock_l4(c, AF_INET6, IPPROTO_TCP, addr, ifname, port, tref.u32); > > @@ -3084,7 +3082,7 @@ static void tcp_ns_sock_init6(const struct ctx *c, in_port_t port) > > { > > in_port_t idx = port + c->tcp.fwd_out.delta[port]; > > union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.outbound = 1, > > - .tcp.v6 = 1, .tcp.index = idx}; > > + .tcp.index = idx}; > > Missing whitespace (from earlier patch). Fixed. > > int s; > > > > assert(c->mode == MODE_PASTA); > > diff --git a/tcp.h b/tcp.h > > index a940682..739b451 100644 > > --- a/tcp.h > > +++ b/tcp.h > > @@ -33,7 +33,6 @@ void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s, > > * union tcp_epoll_ref - epoll reference portion for TCP connections > > * @listen: Set if this file descriptor is a listening socket > > * @outbound: Listening socket maps to outbound, spliced connection > > - * @v6: Set for IPv6 sockets or connections > > * @timer: Reference is a timerfd descriptor for connection > > * @index: Index of connection in table, or port for bound sockets > > * @u32: Opaque u32 value of reference > > @@ -42,7 +41,6 @@ union tcp_epoll_ref { > > struct { > > uint32_t listen:1, > > outbound:1, > > - v6:1, > > timer:1, > > index:20; > > } tcp; > > diff --git a/tcp_splice.c b/tcp_splice.c > > index 30ab0eb..7c2f667 100644 > > --- a/tcp_splice.c > > +++ b/tcp_splice.c > > @@ -167,11 +167,9 @@ static int tcp_splice_epoll_ctl(const struct ctx *c, > > { > > int m = conn->c.in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD; > > union epoll_ref ref_a = { .r.proto = IPPROTO_TCP, .r.s = conn->a, > > - .r.p.tcp.tcp.index = CONN_IDX(conn), > > - .r.p.tcp.tcp.v6 = CONN_V6(conn) }; > > + .r.p.tcp.tcp.index = CONN_IDX(conn) }; > > union epoll_ref ref_b = { .r.proto = IPPROTO_TCP, .r.s = conn->b, > > - .r.p.tcp.tcp.index = CONN_IDX(conn), > > - .r.p.tcp.tcp.v6 = CONN_V6(conn) }; > > + .r.p.tcp.tcp.index = CONN_IDX(conn) }; > > struct epoll_event ev_a = { .data.u64 = ref_a.u64 }; > > struct epoll_event ev_b = { .data.u64 = ref_b.u64 }; > > uint32_t events_a, events_b; > > @@ -504,6 +502,7 @@ static void tcp_splice_dir(struct tcp_splice_conn *conn, int ref_sock, > > * tcp_splice_conn_from_sock() - Attempt to init state for a spliced connection > > * @c: Execution context > > * @ref: epoll reference of listening socket > > + * @ipv6: Should this be an IPv6 connection? > > Left-over from previous idea I guess. Yes indeed, fixed. > > * @conn: connection structure to initialize > > * @s: Accepted socket > > * @sa: Peer address of connection > > @@ -517,7 +516,7 @@ bool tcp_splice_conn_from_sock(struct ctx *c, union epoll_ref ref, > > { > > assert(c->mode == MODE_PASTA); > > > > - if (ref.r.p.tcp.tcp.v6) { > > + if (sa->sa_family == AF_INET6) { > > const struct sockaddr_in6 *sa6 > > = (const struct sockaddr_in6 *)sa; > > if (!IN6_IS_ADDR_LOOPBACK(&sa6->sin6_addr)) > -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson