* [PATCH 1/8] tcp, tap: Correctly advance through packets in tcp_tap_handler()
2023-09-08 1:49 [PATCH 0/8] Fix a number of bugs with handling of TCP packets from tap David Gibson
@ 2023-09-08 1:49 ` David Gibson
2023-09-08 1:49 ` [PATCH 2/8] udp, tap: Correctly advance through packets in udp_tap_handler() David Gibson
` (7 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: David Gibson @ 2023-09-08 1:49 UTC (permalink / raw)
To: passt-dev, Stefano Brivio; +Cc: jlesev, David Gibson
In both tap4_handler() and tap6_handler(), once we've sorted incoming l3
packets into "sequences", we then step through all the packets in each TCP
sequence calling tcp_tap_handler(). Or so it appears.
In fact, tcp_tap_handler() doesn't take an index and always looks at packet
0 of the sequence, except when it calls tcp_data_from_tap() to process
data packets. It appears to be written with the idea that the struct pool
is a queue, from which it consumes packets as it processes them, but that's
not how the pool data structure works - they are more like an array of
packets.
We only get away with this, because setup packets for TCP tend to come in
separate batches (because we need to reply in between) and so we only get
a bunch of packets for the same connection together when they're data
packets (tcp_data_from_tap() has its own loop through packets).
Correct this by adding an index parameter to tcp_tap_handler() and altering
the loops in tap.c to step through the pool properly.
Link: https://bugs.passt.top/show_bug.cgi?id=68
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
tap.c | 25 +++++++++++++++++--------
tcp.c | 28 +++++++++++++++-------------
tcp.h | 2 +-
3 files changed, 33 insertions(+), 22 deletions(-)
diff --git a/tap.c b/tap.c
index 8d7859c..445a5ca 100644
--- a/tap.c
+++ b/tap.c
@@ -707,16 +707,20 @@ append:
for (j = 0, seq = tap4_l4; j < seq_count; j++, seq++) {
struct pool *p = (struct pool *)&seq->p;
- size_t n = p->count;
- tap_packet_debug(NULL, NULL, seq, 0, NULL, n);
+ tap_packet_debug(NULL, NULL, seq, 0, NULL, p->count);
if (seq->protocol == IPPROTO_TCP) {
+ size_t k;
+
if (c->no_tcp)
continue;
- while ((n -= tcp_tap_handler(c, AF_INET, &seq->saddr,
- &seq->daddr, p, now)));
+ for (k = 0; k < p->count; )
+ k += tcp_tap_handler(c, AF_INET, &seq->saddr,
+ &seq->daddr, p, k, now);
} else if (seq->protocol == IPPROTO_UDP) {
+ size_t n = p->count;
+
if (c->no_udp)
continue;
while ((n -= udp_tap_handler(c, AF_INET, &seq->saddr,
@@ -868,16 +872,21 @@ append:
for (j = 0, seq = tap6_l4; j < seq_count; j++, seq++) {
struct pool *p = (struct pool *)&seq->p;
- size_t n = p->count;
- tap_packet_debug(NULL, NULL, NULL, seq->protocol, seq, n);
+ tap_packet_debug(NULL, NULL, NULL, seq->protocol, seq,
+ p->count);
if (seq->protocol == IPPROTO_TCP) {
+ size_t k;
+
if (c->no_tcp)
continue;
- while ((n -= tcp_tap_handler(c, AF_INET6, &seq->saddr,
- &seq->daddr, p, now)));
+ for (k = 0; k < p->count; )
+ k += tcp_tap_handler(c, AF_INET6, &seq->saddr,
+ &seq->daddr, p, k, now);
} else if (seq->protocol == IPPROTO_UDP) {
+ size_t n = p->count;
+
if (c->no_udp)
continue;
while ((n -= udp_tap_handler(c, AF_INET6, &seq->saddr,
diff --git a/tcp.c b/tcp.c
index c89e6e4..d8c2327 100644
--- a/tcp.c
+++ b/tcp.c
@@ -2294,11 +2294,12 @@ err:
* @c: Execution context
* @conn: Connection pointer
* @p: Pool of TCP packets, with TCP headers
+ * @idx: Index of first data packet in pool
*
* #syscalls sendmsg
*/
static void tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
- const struct pool *p)
+ const struct pool *p, int idx)
{
int i, iov_i, ack = 0, fin = 0, retr = 0, keep = -1, partial_send = 0;
uint16_t max_ack_seq_wnd = conn->wnd_from_tap;
@@ -2313,7 +2314,7 @@ static void tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
ASSERT(conn->events & ESTABLISHED);
- for (i = 0, iov_i = 0; i < (int)p->count; i++) {
+ for (i = idx, iov_i = 0; i < (int)p->count; i++) {
uint32_t seq, seq_offset, ack_seq;
struct tcphdr *th;
char *data;
@@ -2530,12 +2531,13 @@ static void tcp_conn_from_sock_finish(struct ctx *c, struct tcp_tap_conn *conn,
* @saddr: Source address
* @daddr: Destination address
* @p: Pool of TCP packets, with TCP headers
+ * @idx: Index of first packet in pool to process
* @now: Current timestamp
*
* Return: count of consumed packets
*/
int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
- const struct pool *p, const struct timespec *now)
+ const struct pool *p, int idx, const struct timespec *now)
{
struct tcp_tap_conn *conn;
size_t optlen, len;
@@ -2543,17 +2545,17 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
int ack_due = 0;
char *opts;
- if (!packet_get(p, 0, 0, 0, &len))
+ if (!packet_get(p, idx, 0, 0, &len))
return 1;
- th = packet_get(p, 0, 0, sizeof(*th), NULL);
+ th = packet_get(p, idx, 0, sizeof(*th), NULL);
if (!th)
return 1;
optlen = th->doff * 4UL - sizeof(*th);
/* Static checkers might fail to see this: */
optlen = MIN(optlen, ((1UL << 4) /* from doff width */ - 6) * 4UL);
- opts = packet_get(p, 0, sizeof(*th), optlen, NULL);
+ opts = packet_get(p, idx, sizeof(*th), optlen, NULL);
conn = tcp_hash_lookup(c, af, daddr, htons(th->source), htons(th->dest));
@@ -2569,7 +2571,7 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
if (th->rst) {
conn_event(c, conn, CLOSED);
- return p->count;
+ return p->count - idx;
}
if (th->ack && !(conn->events & ESTABLISHED))
@@ -2591,7 +2593,7 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
if (conn->events & TAP_SYN_RCVD) {
if (!(conn->events & TAP_SYN_ACK_SENT)) {
tcp_rst(c, conn);
- return p->count;
+ return p->count - idx;
}
conn_event(c, conn, ESTABLISHED);
@@ -2603,19 +2605,19 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
tcp_send_flag(c, conn, ACK);
conn_event(c, conn, SOCK_FIN_SENT);
- return p->count;
+ return p->count - idx;
}
if (!th->ack) {
tcp_rst(c, conn);
- return p->count;
+ return p->count - idx;
}
tcp_clamp_window(c, conn, ntohs(th->window));
tcp_data_from_sock(c, conn);
- if (p->count == 1)
+ if (p->count - idx == 1)
return 1;
}
@@ -2631,7 +2633,7 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
}
/* Established connections accepting data from tap */
- tcp_data_from_tap(c, conn, p);
+ tcp_data_from_tap(c, conn, p, idx);
if (conn->seq_ack_to_tap != conn->seq_from_tap)
ack_due = 1;
@@ -2645,7 +2647,7 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
if (ack_due)
conn_flag(c, conn, ACK_TO_TAP_DUE);
- return p->count;
+ return p->count - idx;
}
/**
diff --git a/tcp.h b/tcp.h
index 9eaec3f..6444d6a 100644
--- a/tcp.h
+++ b/tcp.h
@@ -18,7 +18,7 @@ void tcp_listen_handler(struct ctx *c, union epoll_ref ref,
const struct timespec *now);
void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events);
int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
- const struct pool *p, const struct timespec *now);
+ const struct pool *p, int idx, const struct timespec *now);
int tcp_sock_init(const struct ctx *c, sa_family_t af, const void *addr,
const char *ifname, in_port_t port);
int tcp_init(struct ctx *c);
--
@@ -18,7 +18,7 @@ void tcp_listen_handler(struct ctx *c, union epoll_ref ref,
const struct timespec *now);
void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events);
int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
- const struct pool *p, const struct timespec *now);
+ const struct pool *p, int idx, const struct timespec *now);
int tcp_sock_init(const struct ctx *c, sa_family_t af, const void *addr,
const char *ifname, in_port_t port);
int tcp_init(struct ctx *c);
--
2.41.0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 2/8] udp, tap: Correctly advance through packets in udp_tap_handler()
2023-09-08 1:49 [PATCH 0/8] Fix a number of bugs with handling of TCP packets from tap David Gibson
2023-09-08 1:49 ` [PATCH 1/8] tcp, tap: Correctly advance through packets in tcp_tap_handler() David Gibson
@ 2023-09-08 1:49 ` David Gibson
2023-09-08 1:49 ` [PATCH 3/8] tcp: Remove some redundant packet_get() operations David Gibson
` (6 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: David Gibson @ 2023-09-08 1:49 UTC (permalink / raw)
To: passt-dev, Stefano Brivio; +Cc: jlesev, David Gibson
In both tap4_handler() and tap6_handler(), once we've sorted incoming l3
packets into "sequences", we then step through all the packets in each DUP
sequence calling udp_tap_handler(). Or so it appears.
In fact, udp_tap_handler() doesn't take an index and always starts with
packet 0 of the sequence, even if called repeatedly. It appears to be
written with the idea that the struct pool is a queue, from which it
consumes packets as it processes them, but that's not how the pool data
structure works.
Correct this by adding an index parameter to udp_tap_handler() and altering
the loops in tap.c to step through the pool properly.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
tap.c | 20 ++++++++------------
udp.c | 15 ++++++++-------
udp.h | 2 +-
3 files changed, 17 insertions(+), 20 deletions(-)
diff --git a/tap.c b/tap.c
index 445a5ca..93db989 100644
--- a/tap.c
+++ b/tap.c
@@ -707,24 +707,22 @@ append:
for (j = 0, seq = tap4_l4; j < seq_count; j++, seq++) {
struct pool *p = (struct pool *)&seq->p;
+ size_t k;
tap_packet_debug(NULL, NULL, seq, 0, NULL, p->count);
if (seq->protocol == IPPROTO_TCP) {
- size_t k;
-
if (c->no_tcp)
continue;
for (k = 0; k < p->count; )
k += tcp_tap_handler(c, AF_INET, &seq->saddr,
&seq->daddr, p, k, now);
} else if (seq->protocol == IPPROTO_UDP) {
- size_t n = p->count;
-
if (c->no_udp)
continue;
- while ((n -= udp_tap_handler(c, AF_INET, &seq->saddr,
- &seq->daddr, p, now)));
+ for (k = 0; k < p->count; )
+ k += udp_tap_handler(c, AF_INET, &seq->saddr,
+ &seq->daddr, p, k, now);
}
}
@@ -872,25 +870,23 @@ append:
for (j = 0, seq = tap6_l4; j < seq_count; j++, seq++) {
struct pool *p = (struct pool *)&seq->p;
+ size_t k;
tap_packet_debug(NULL, NULL, NULL, seq->protocol, seq,
p->count);
if (seq->protocol == IPPROTO_TCP) {
- size_t k;
-
if (c->no_tcp)
continue;
for (k = 0; k < p->count; )
k += tcp_tap_handler(c, AF_INET6, &seq->saddr,
&seq->daddr, p, k, now);
} else if (seq->protocol == IPPROTO_UDP) {
- size_t n = p->count;
-
if (c->no_udp)
continue;
- while ((n -= udp_tap_handler(c, AF_INET6, &seq->saddr,
- &seq->daddr, p, now)));
+ for (k = 0; k < p->count; )
+ k += udp_tap_handler(c, AF_INET6, &seq->saddr,
+ &seq->daddr, p, k, now);
}
}
diff --git a/udp.c b/udp.c
index f4ed660..ed1b7a5 100644
--- a/udp.c
+++ b/udp.c
@@ -789,6 +789,7 @@ void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
* @saddr: Source address
* @daddr: Destination address
* @p: Pool of UDP packets, with UDP headers
+ * @idx: Index of first packet to process
* @now: Current timestamp
*
* Return: count of consumed packets
@@ -796,7 +797,7 @@ void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
* #syscalls sendmmsg
*/
int udp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
- const struct pool *p, const struct timespec *now)
+ const struct pool *p, int idx, const struct timespec *now)
{
struct mmsghdr mm[UIO_MAXIOV];
struct iovec m[UIO_MAXIOV];
@@ -811,7 +812,7 @@ int udp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
(void)c;
(void)saddr;
- uh = packet_get(p, 0, 0, sizeof(*uh), NULL);
+ uh = packet_get(p, idx, 0, sizeof(*uh), NULL);
if (!uh)
return 1;
@@ -859,7 +860,7 @@ int udp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
s = sock_l4(c, AF_INET, IPPROTO_UDP, &bind_addr,
bind_if, src, uref.u32);
if (s < 0)
- return p->count;
+ return p->count - idx;
udp_tap_map[V4][src].sock = s;
bitmap_set(udp_act[V4][UDP_ACT_TAP], src);
@@ -909,7 +910,7 @@ int udp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
s = sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr,
bind_if, src, uref.u32);
if (s < 0)
- return p->count;
+ return p->count - idx;
udp_tap_map[V6][src].sock = s;
bitmap_set(udp_act[V6][UDP_ACT_TAP], src);
@@ -918,13 +919,13 @@ int udp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
udp_tap_map[V6][src].ts = now->tv_sec;
}
- for (i = 0; i < (int)p->count; i++) {
+ for (i = 0; i < (int)p->count - idx; i++) {
struct udphdr *uh_send;
size_t len;
- uh_send = packet_get(p, i, 0, sizeof(*uh), &len);
+ uh_send = packet_get(p, idx + i, 0, sizeof(*uh), &len);
if (!uh_send)
- return p->count;
+ return p->count - idx;
mm[i].msg_hdr.msg_name = sa;
mm[i].msg_hdr.msg_namelen = sl;
diff --git a/udp.h b/udp.h
index f553de2..0ee0695 100644
--- a/udp.h
+++ b/udp.h
@@ -11,7 +11,7 @@
void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
const struct timespec *now);
int udp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
- const struct pool *p, const struct timespec *now);
+ const struct pool *p, int idx, const struct timespec *now);
int udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
const void *addr, const char *ifname, in_port_t port);
int udp_init(struct ctx *c);
--
@@ -11,7 +11,7 @@
void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
const struct timespec *now);
int udp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
- const struct pool *p, const struct timespec *now);
+ const struct pool *p, int idx, const struct timespec *now);
int udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
const void *addr, const char *ifname, in_port_t port);
int udp_init(struct ctx *c);
--
2.41.0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 3/8] tcp: Remove some redundant packet_get() operations
2023-09-08 1:49 [PATCH 0/8] Fix a number of bugs with handling of TCP packets from tap David Gibson
2023-09-08 1:49 ` [PATCH 1/8] tcp, tap: Correctly advance through packets in tcp_tap_handler() David Gibson
2023-09-08 1:49 ` [PATCH 2/8] udp, tap: Correctly advance through packets in udp_tap_handler() David Gibson
@ 2023-09-08 1:49 ` David Gibson
2023-09-08 1:49 ` [PATCH 4/8] tcp: Never hash match closed connections David Gibson
` (5 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: David Gibson @ 2023-09-08 1:49 UTC (permalink / raw)
To: passt-dev, Stefano Brivio; +Cc: jlesev, David Gibson
Both tcp_data_from_tap() and tcp_tap_handler() call packet_get() to get
the entire L4 packet length, then immediately call it again to check that
the packet is long enough to include a TCP header. The features of
packet_get() let us easily combine these together, we just need to adjust
the length slightly, because we want the value to include the TCP header
length.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
tcp.c | 14 ++++----------
1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/tcp.c b/tcp.c
index d8c2327..6a34f82 100644
--- a/tcp.c
+++ b/tcp.c
@@ -2320,16 +2320,12 @@ static void tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
char *data;
size_t off;
- if (!packet_get(p, i, 0, 0, &len)) {
- tcp_rst(c, conn);
- return;
- }
-
- th = packet_get(p, i, 0, sizeof(*th), NULL);
+ th = packet_get(p, i, 0, sizeof(*th), &len);
if (!th) {
tcp_rst(c, conn);
return;
}
+ len += sizeof(*th);
off = th->doff * 4UL;
if (off < sizeof(*th) || off > len) {
@@ -2545,12 +2541,10 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
int ack_due = 0;
char *opts;
- if (!packet_get(p, idx, 0, 0, &len))
- return 1;
-
- th = packet_get(p, idx, 0, sizeof(*th), NULL);
+ th = packet_get(p, idx, 0, sizeof(*th), &len);
if (!th)
return 1;
+ len += sizeof(*th);
optlen = th->doff * 4UL - sizeof(*th);
/* Static checkers might fail to see this: */
--
@@ -2320,16 +2320,12 @@ static void tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
char *data;
size_t off;
- if (!packet_get(p, i, 0, 0, &len)) {
- tcp_rst(c, conn);
- return;
- }
-
- th = packet_get(p, i, 0, sizeof(*th), NULL);
+ th = packet_get(p, i, 0, sizeof(*th), &len);
if (!th) {
tcp_rst(c, conn);
return;
}
+ len += sizeof(*th);
off = th->doff * 4UL;
if (off < sizeof(*th) || off > len) {
@@ -2545,12 +2541,10 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
int ack_due = 0;
char *opts;
- if (!packet_get(p, idx, 0, 0, &len))
- return 1;
-
- th = packet_get(p, idx, 0, sizeof(*th), NULL);
+ th = packet_get(p, idx, 0, sizeof(*th), &len);
if (!th)
return 1;
+ len += sizeof(*th);
optlen = th->doff * 4UL - sizeof(*th);
/* Static checkers might fail to see this: */
--
2.41.0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 4/8] tcp: Never hash match closed connections
2023-09-08 1:49 [PATCH 0/8] Fix a number of bugs with handling of TCP packets from tap David Gibson
` (2 preceding siblings ...)
2023-09-08 1:49 ` [PATCH 3/8] tcp: Remove some redundant packet_get() operations David Gibson
@ 2023-09-08 1:49 ` David Gibson
2023-09-08 1:49 ` [PATCH 5/8] tcp: Return consumed packet count from tcp_data_from_tap() David Gibson
` (4 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: David Gibson @ 2023-09-08 1:49 UTC (permalink / raw)
To: passt-dev, Stefano Brivio; +Cc: jlesev, David Gibson
From a practical point of view, when a TCP connection ends, whether by
FIN or by RST, we set the CLOSED event, then some time later we remove the
connection from the hash table and clean it up. However, from a protocol
point of view, once it's closed, it's gone, and any new packets with
matching addresses and ports are either forming a new connection, or are
invalid packets to discard.
Enforce these semantics in the TCP hash logic by never hash matching closed
connections.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
tcp.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tcp.c b/tcp.c
index 6a34f82..5592998 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1146,7 +1146,7 @@ static int tcp_hash_match(const struct tcp_tap_conn *conn,
const union inany_addr *faddr,
in_port_t eport, in_port_t fport)
{
- if (inany_equals(&conn->faddr, faddr) &&
+ if (conn->events != CLOSED && inany_equals(&conn->faddr, faddr) &&
conn->eport == eport && conn->fport == fport)
return 1;
--
@@ -1146,7 +1146,7 @@ static int tcp_hash_match(const struct tcp_tap_conn *conn,
const union inany_addr *faddr,
in_port_t eport, in_port_t fport)
{
- if (inany_equals(&conn->faddr, faddr) &&
+ if (conn->events != CLOSED && inany_equals(&conn->faddr, faddr) &&
conn->eport == eport && conn->fport == fport)
return 1;
--
2.41.0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 5/8] tcp: Return consumed packet count from tcp_data_from_tap()
2023-09-08 1:49 [PATCH 0/8] Fix a number of bugs with handling of TCP packets from tap David Gibson
` (3 preceding siblings ...)
2023-09-08 1:49 ` [PATCH 4/8] tcp: Never hash match closed connections David Gibson
@ 2023-09-08 1:49 ` David Gibson
2023-09-08 1:49 ` [PATCH 6/8] tcp: Correctly handle RST followed rapidly by SYN David Gibson
` (3 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: David Gibson @ 2023-09-08 1:49 UTC (permalink / raw)
To: passt-dev, Stefano Brivio; +Cc: jlesev, David Gibson
Currently tcp_data_from_tap() is assumed to consume all packets remaining
in the packet pool it is given. However there are some edge cases where
that's not correct. In preparation for fixing those, change it to return
a count of packets consumed and use that in its caller.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
tcp.c | 25 +++++++++++++++----------
1 file changed, 15 insertions(+), 10 deletions(-)
diff --git a/tcp.c b/tcp.c
index 5592998..34c27f0 100644
--- a/tcp.c
+++ b/tcp.c
@@ -2297,8 +2297,10 @@ err:
* @idx: Index of first data packet in pool
*
* #syscalls sendmsg
+ *
+ * Return: count of consumed packets
*/
-static void tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
+static int tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
const struct pool *p, int idx)
{
int i, iov_i, ack = 0, fin = 0, retr = 0, keep = -1, partial_send = 0;
@@ -2310,7 +2312,7 @@ static void tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
ssize_t n;
if (conn->events == CLOSED)
- return;
+ return p->count - idx;
ASSERT(conn->events & ESTABLISHED);
@@ -2323,19 +2325,19 @@ static void tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
th = packet_get(p, i, 0, sizeof(*th), &len);
if (!th) {
tcp_rst(c, conn);
- return;
+ return p->count - idx;
}
len += sizeof(*th);
off = th->doff * 4UL;
if (off < sizeof(*th) || off > len) {
tcp_rst(c, conn);
- return;
+ return p->count - idx;
}
if (th->rst) {
conn_event(c, conn, CLOSED);
- return;
+ return p->count - idx;
}
len -= off;
@@ -2446,10 +2448,10 @@ eintr:
if (errno == EAGAIN || errno == EWOULDBLOCK) {
tcp_send_flag(c, conn, ACK_IF_NEEDED);
- return;
+ return p->count - idx;
}
tcp_rst(c, conn);
- return;
+ return p->count - idx;
}
if (n < (int)(seq_from_tap - conn->seq_from_tap)) {
@@ -2470,7 +2472,7 @@ out:
conn->seq_dup_ack_approx = conn->seq_from_tap & 0xff;
tcp_send_flag(c, conn, DUP_ACK);
}
- return;
+ return p->count - idx;
}
if (ack && conn->events & TAP_FIN_SENT &&
@@ -2484,6 +2486,8 @@ out:
} else {
tcp_send_flag(c, conn, ACK_IF_NEEDED);
}
+
+ return p->count - idx;
}
/**
@@ -2540,6 +2544,7 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
struct tcphdr *th;
int ack_due = 0;
char *opts;
+ int count;
th = packet_get(p, idx, 0, sizeof(*th), &len);
if (!th)
@@ -2627,7 +2632,7 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
}
/* Established connections accepting data from tap */
- tcp_data_from_tap(c, conn, p, idx);
+ count = tcp_data_from_tap(c, conn, p, idx);
if (conn->seq_ack_to_tap != conn->seq_from_tap)
ack_due = 1;
@@ -2641,7 +2646,7 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
if (ack_due)
conn_flag(c, conn, ACK_TO_TAP_DUE);
- return p->count - idx;
+ return count;
}
/**
--
@@ -2297,8 +2297,10 @@ err:
* @idx: Index of first data packet in pool
*
* #syscalls sendmsg
+ *
+ * Return: count of consumed packets
*/
-static void tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
+static int tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
const struct pool *p, int idx)
{
int i, iov_i, ack = 0, fin = 0, retr = 0, keep = -1, partial_send = 0;
@@ -2310,7 +2312,7 @@ static void tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
ssize_t n;
if (conn->events == CLOSED)
- return;
+ return p->count - idx;
ASSERT(conn->events & ESTABLISHED);
@@ -2323,19 +2325,19 @@ static void tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
th = packet_get(p, i, 0, sizeof(*th), &len);
if (!th) {
tcp_rst(c, conn);
- return;
+ return p->count - idx;
}
len += sizeof(*th);
off = th->doff * 4UL;
if (off < sizeof(*th) || off > len) {
tcp_rst(c, conn);
- return;
+ return p->count - idx;
}
if (th->rst) {
conn_event(c, conn, CLOSED);
- return;
+ return p->count - idx;
}
len -= off;
@@ -2446,10 +2448,10 @@ eintr:
if (errno == EAGAIN || errno == EWOULDBLOCK) {
tcp_send_flag(c, conn, ACK_IF_NEEDED);
- return;
+ return p->count - idx;
}
tcp_rst(c, conn);
- return;
+ return p->count - idx;
}
if (n < (int)(seq_from_tap - conn->seq_from_tap)) {
@@ -2470,7 +2472,7 @@ out:
conn->seq_dup_ack_approx = conn->seq_from_tap & 0xff;
tcp_send_flag(c, conn, DUP_ACK);
}
- return;
+ return p->count - idx;
}
if (ack && conn->events & TAP_FIN_SENT &&
@@ -2484,6 +2486,8 @@ out:
} else {
tcp_send_flag(c, conn, ACK_IF_NEEDED);
}
+
+ return p->count - idx;
}
/**
@@ -2540,6 +2544,7 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
struct tcphdr *th;
int ack_due = 0;
char *opts;
+ int count;
th = packet_get(p, idx, 0, sizeof(*th), &len);
if (!th)
@@ -2627,7 +2632,7 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
}
/* Established connections accepting data from tap */
- tcp_data_from_tap(c, conn, p, idx);
+ count = tcp_data_from_tap(c, conn, p, idx);
if (conn->seq_ack_to_tap != conn->seq_from_tap)
ack_due = 1;
@@ -2641,7 +2646,7 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
if (ack_due)
conn_flag(c, conn, ACK_TO_TAP_DUE);
- return p->count - idx;
+ return count;
}
/**
--
2.41.0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 6/8] tcp: Correctly handle RST followed rapidly by SYN
2023-09-08 1:49 [PATCH 0/8] Fix a number of bugs with handling of TCP packets from tap David Gibson
` (4 preceding siblings ...)
2023-09-08 1:49 ` [PATCH 5/8] tcp: Return consumed packet count from tcp_data_from_tap() David Gibson
@ 2023-09-08 1:49 ` David Gibson
2023-09-08 1:49 ` [PATCH 7/8] tcp: Consolidate paths where we initiate reset on tap interface David Gibson
` (2 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: David Gibson @ 2023-09-08 1:49 UTC (permalink / raw)
To: passt-dev, Stefano Brivio; +Cc: jlesev, David Gibson
Although it's unlikely in practice, the guest could theoretically
reset one TCP connection then immediately start a new one with the
same addressses and ports, such that we get an RST then a SYN in the
same batch of received packets in tcp_tap_handler().
We don't correctly handle that unlikely case, because when we receive
the RST, we discard any remaining packets in the batch so we'd never
see the SYN. This could happen in either tcp_tap_handler() or
tcp_data_from_tap(). Correct that by returning 1, so that the caller
will continue calling tcp_tap_handler() on subsequent packets allowing
us to process any subsequent SYN.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
tcp.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tcp.c b/tcp.c
index 34c27f0..a1b5a72 100644
--- a/tcp.c
+++ b/tcp.c
@@ -2337,7 +2337,7 @@ static int tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
if (th->rst) {
conn_event(c, conn, CLOSED);
- return p->count - idx;
+ return 1;
}
len -= off;
@@ -2570,7 +2570,7 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
if (th->rst) {
conn_event(c, conn, CLOSED);
- return p->count - idx;
+ return 1;
}
if (th->ack && !(conn->events & ESTABLISHED))
--
@@ -2337,7 +2337,7 @@ static int tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
if (th->rst) {
conn_event(c, conn, CLOSED);
- return p->count - idx;
+ return 1;
}
len -= off;
@@ -2570,7 +2570,7 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
if (th->rst) {
conn_event(c, conn, CLOSED);
- return p->count - idx;
+ return 1;
}
if (th->ack && !(conn->events & ESTABLISHED))
--
2.41.0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 7/8] tcp: Consolidate paths where we initiate reset on tap interface
2023-09-08 1:49 [PATCH 0/8] Fix a number of bugs with handling of TCP packets from tap David Gibson
` (5 preceding siblings ...)
2023-09-08 1:49 ` [PATCH 6/8] tcp: Correctly handle RST followed rapidly by SYN David Gibson
@ 2023-09-08 1:49 ` David Gibson
2023-09-08 1:49 ` [PATCH 8/8] tcp: Correct handling of FIN,ACK followed by SYN David Gibson
2023-09-08 15:27 ` [PATCH 0/8] Fix a number of bugs with handling of TCP packets from tap Stefano Brivio
8 siblings, 0 replies; 10+ messages in thread
From: David Gibson @ 2023-09-08 1:49 UTC (permalink / raw)
To: passt-dev, Stefano Brivio; +Cc: jlesev, David Gibson
There are a number of conditions where we will issue a TCP RST in response
to something unexpected we received from the tap interface. These occur in
both tcp_data_from_tap() and tcp_tap_handler(). In tcp_tap_handler() use
a 'goto out of line' technique to consolidate all these paths into one
place. For the tcp_data_from_tap() cases use a negative return code and
direct that to the same path in tcp_tap_handler(), its caller.
In this case we want to discard all remaining packets in the batch we have
received: even if they're otherwise good, they'll be invalidated when the
guest receives the RST we're sending. This is subtly different from the
case where we *receive* an RST, where we could in theory get a new SYN
immediately afterwards. Clarify that with a common on the now common
reset path.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
tcp.c | 47 +++++++++++++++++++++++++----------------------
1 file changed, 25 insertions(+), 22 deletions(-)
diff --git a/tcp.c b/tcp.c
index a1b5a72..c76df73 100644
--- a/tcp.c
+++ b/tcp.c
@@ -2323,17 +2323,13 @@ static int tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
size_t off;
th = packet_get(p, i, 0, sizeof(*th), &len);
- if (!th) {
- tcp_rst(c, conn);
- return p->count - idx;
- }
+ if (!th)
+ return -1;
len += sizeof(*th);
off = th->doff * 4UL;
- if (off < sizeof(*th) || off > len) {
- tcp_rst(c, conn);
- return p->count - idx;
- }
+ if (off < sizeof(*th) || off > len)
+ return -1;
if (th->rst) {
conn_event(c, conn, CLOSED);
@@ -2449,9 +2445,9 @@ eintr:
if (errno == EAGAIN || errno == EWOULDBLOCK) {
tcp_send_flag(c, conn, ACK_IF_NEEDED);
return p->count - idx;
+
}
- tcp_rst(c, conn);
- return p->count - idx;
+ return -1;
}
if (n < (int)(seq_from_tap - conn->seq_from_tap)) {
@@ -2580,20 +2576,18 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
/* Establishing connection from socket */
if (conn->events & SOCK_ACCEPTED) {
- if (th->syn && th->ack && !th->fin)
+ if (th->syn && th->ack && !th->fin) {
tcp_conn_from_sock_finish(c, conn, th, opts, optlen);
- else
- tcp_rst(c, conn);
+ return 1;
+ }
- return 1;
+ goto reset;
}
/* Establishing connection from tap */
if (conn->events & TAP_SYN_RCVD) {
- if (!(conn->events & TAP_SYN_ACK_SENT)) {
- tcp_rst(c, conn);
- return p->count - idx;
- }
+ if (!(conn->events & TAP_SYN_ACK_SENT))
+ goto reset;
conn_event(c, conn, ESTABLISHED);
@@ -2607,10 +2601,8 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
return p->count - idx;
}
- if (!th->ack) {
- tcp_rst(c, conn);
- return p->count - idx;
- }
+ if (!th->ack)
+ goto reset;
tcp_clamp_window(c, conn, ntohs(th->window));
@@ -2633,6 +2625,9 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
/* Established connections accepting data from tap */
count = tcp_data_from_tap(c, conn, p, idx);
+ if (count == -1)
+ goto reset;
+
if (conn->seq_ack_to_tap != conn->seq_from_tap)
ack_due = 1;
@@ -2647,6 +2642,14 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
conn_flag(c, conn, ACK_TO_TAP_DUE);
return count;
+
+reset:
+ /* Something's gone wrong, so reset the connection. We discard
+ * remaining packets in the batch, since they'd be invalidated when our
+ * RST is received, even if otherwise good.
+ */
+ tcp_rst(c, conn);
+ return p->count - idx;
}
/**
--
@@ -2323,17 +2323,13 @@ static int tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
size_t off;
th = packet_get(p, i, 0, sizeof(*th), &len);
- if (!th) {
- tcp_rst(c, conn);
- return p->count - idx;
- }
+ if (!th)
+ return -1;
len += sizeof(*th);
off = th->doff * 4UL;
- if (off < sizeof(*th) || off > len) {
- tcp_rst(c, conn);
- return p->count - idx;
- }
+ if (off < sizeof(*th) || off > len)
+ return -1;
if (th->rst) {
conn_event(c, conn, CLOSED);
@@ -2449,9 +2445,9 @@ eintr:
if (errno == EAGAIN || errno == EWOULDBLOCK) {
tcp_send_flag(c, conn, ACK_IF_NEEDED);
return p->count - idx;
+
}
- tcp_rst(c, conn);
- return p->count - idx;
+ return -1;
}
if (n < (int)(seq_from_tap - conn->seq_from_tap)) {
@@ -2580,20 +2576,18 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
/* Establishing connection from socket */
if (conn->events & SOCK_ACCEPTED) {
- if (th->syn && th->ack && !th->fin)
+ if (th->syn && th->ack && !th->fin) {
tcp_conn_from_sock_finish(c, conn, th, opts, optlen);
- else
- tcp_rst(c, conn);
+ return 1;
+ }
- return 1;
+ goto reset;
}
/* Establishing connection from tap */
if (conn->events & TAP_SYN_RCVD) {
- if (!(conn->events & TAP_SYN_ACK_SENT)) {
- tcp_rst(c, conn);
- return p->count - idx;
- }
+ if (!(conn->events & TAP_SYN_ACK_SENT))
+ goto reset;
conn_event(c, conn, ESTABLISHED);
@@ -2607,10 +2601,8 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
return p->count - idx;
}
- if (!th->ack) {
- tcp_rst(c, conn);
- return p->count - idx;
- }
+ if (!th->ack)
+ goto reset;
tcp_clamp_window(c, conn, ntohs(th->window));
@@ -2633,6 +2625,9 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
/* Established connections accepting data from tap */
count = tcp_data_from_tap(c, conn, p, idx);
+ if (count == -1)
+ goto reset;
+
if (conn->seq_ack_to_tap != conn->seq_from_tap)
ack_due = 1;
@@ -2647,6 +2642,14 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
conn_flag(c, conn, ACK_TO_TAP_DUE);
return count;
+
+reset:
+ /* Something's gone wrong, so reset the connection. We discard
+ * remaining packets in the batch, since they'd be invalidated when our
+ * RST is received, even if otherwise good.
+ */
+ tcp_rst(c, conn);
+ return p->count - idx;
}
/**
--
2.41.0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 8/8] tcp: Correct handling of FIN,ACK followed by SYN
2023-09-08 1:49 [PATCH 0/8] Fix a number of bugs with handling of TCP packets from tap David Gibson
` (6 preceding siblings ...)
2023-09-08 1:49 ` [PATCH 7/8] tcp: Consolidate paths where we initiate reset on tap interface David Gibson
@ 2023-09-08 1:49 ` David Gibson
2023-09-08 15:27 ` [PATCH 0/8] Fix a number of bugs with handling of TCP packets from tap Stefano Brivio
8 siblings, 0 replies; 10+ messages in thread
From: David Gibson @ 2023-09-08 1:49 UTC (permalink / raw)
To: passt-dev, Stefano Brivio; +Cc: jlesev, David Gibson
When the guest tries to establish a connection, it could give up on it by
sending a FIN,ACK instead of a plain ACK to our SYN,ACK. It could then
make a new attempt to establish a connection with the same addresses and
ports with a new SYN.
Although it's unlikely, it could send the 2nd SYN very shortly after the
FIN,ACK resulting in both being received in the same batch of packets from
the tap interface.
Currently, we don't handle that correctly, when we receive a FIN,ACK on a
not fully established connection we discard the remaining packets in the
batch, and so will never process the 2nd SYN. Correct this by returning
1 from tcp_tap_handler() in this case, so we'll just consume the FIN,ACK
and continue to process the rest of the batch.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
tcp.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tcp.c b/tcp.c
index c76df73..dd3142d 100644
--- a/tcp.c
+++ b/tcp.c
@@ -2598,7 +2598,7 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
tcp_send_flag(c, conn, ACK);
conn_event(c, conn, SOCK_FIN_SENT);
- return p->count - idx;
+ return 1;
}
if (!th->ack)
--
@@ -2598,7 +2598,7 @@ int tcp_tap_handler(struct ctx *c, int af, const void *saddr, const void *daddr,
tcp_send_flag(c, conn, ACK);
conn_event(c, conn, SOCK_FIN_SENT);
- return p->count - idx;
+ return 1;
}
if (!th->ack)
--
2.41.0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH 0/8] Fix a number of bugs with handling of TCP packets from tap
2023-09-08 1:49 [PATCH 0/8] Fix a number of bugs with handling of TCP packets from tap David Gibson
` (7 preceding siblings ...)
2023-09-08 1:49 ` [PATCH 8/8] tcp: Correct handling of FIN,ACK followed by SYN David Gibson
@ 2023-09-08 15:27 ` Stefano Brivio
8 siblings, 0 replies; 10+ messages in thread
From: Stefano Brivio @ 2023-09-08 15:27 UTC (permalink / raw)
To: David Gibson; +Cc: passt-dev, jlesev
On Fri, 8 Sep 2023 11:49:45 +1000
David Gibson <david@gibson.dropbear.id.au> wrote:
> In the course of investigating bug 68, I discovered a number of pretty
> serious bugs in how we handle various cases in tcp_tap_handler() and
> tcp_data_from_tap(). This series fixes a number of them.
>
> Note that while I'm pretty sure the bugs fixed here are real, I
> haven't yet positively traced how they lead to the symptoms in bug 68
> - I'm still waiting on the results from some special instrumentation
> to track that down.
>
> Link: https://bugs.passt.top/show_bug.cgi?id=68
>
> David Gibson (8):
> tcp, tap: Correctly advance through packets in tcp_tap_handler()
> udp, tap: Correctly advance through packets in udp_tap_handler()
> tcp: Remove some redundant packet_get() operations
> tcp: Never hash match closed connections
> tcp: Return consumed packet count from tcp_data_from_tap()
> tcp: Correctly handle RST followed rapidly by SYN
> tcp: Consolidate paths where we initiate reset on tap interface
> tcp: Correct handling of FIN,ACK followed by SYN
Series applied, thanks.
--
Stefano
^ permalink raw reply [flat|nested] 10+ messages in thread