public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
blob 70036fc04d6320e32ba7b86238ae1222a8bd75ac 9911 bytes (raw)

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
 
/* SPDX-License-Identifier: GPL-2.0-or-later
 * Copyright Red Hat
 * Author: David Gibson <david@gibson.dropbear.id.au>
 *
 * Tracking for logical "flows" of packets.
 */

#include <stdint.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <arpa/inet.h>

#include "util.h"
#include "passt.h"
#include "siphash.h"
#include "inany.h"
#include "flow.h"
#include "flow_table.h"

const char *flow_type_str[] = {
	[FLOW_TYPE_NONE]	= "<none>",
	[FLOW_TCP]		= "TCP connection",
	[FLOW_TCP_SPLICE]	= "TCP connection (spliced)",
};
static_assert(ARRAY_SIZE(flow_type_str) == FLOW_NUM_TYPES,
	      "flow_type_str[] doesn't match enum flow_type");

const uint8_t flow_proto[] = {
	[FLOW_TCP]		= IPPROTO_TCP,
	[FLOW_TCP_SPLICE]	= IPPROTO_TCP,
};
static_assert(ARRAY_SIZE(flow_proto) == FLOW_NUM_TYPES,
	      "flow_proto[] doesn't match enum flow_type");

/* Global Flow Table */
unsigned flow_first_free;
union flow flowtab[FLOW_MAX];

/* Last time the flow timers ran */
static struct timespec flow_timer_run;

/** flow_log_ - Log flow-related message
 * @f:		flow the message is related to
 * @pri:	Log priority
 * @fmt:	Format string
 * @...:	printf-arguments
 */
void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...)
{
	char msg[BUFSIZ];
	va_list args;

	va_start(args, fmt);
	(void)vsnprintf(msg, sizeof(msg), fmt, args);
	va_end(args);

	logmsg(pri, "Flow %u (%s): %s", flow_idx(f), FLOW_TYPE(f), msg);
}

/**
 * flow_new_dbg() - Print debug message for new flow
 * @f:		Common flow structure
 * @side:	Which side initiated the new flow
 */
void flow_new_dbg(const struct flow_common *f, unsigned side)
{
	char ebuf[INET6_ADDRSTRLEN], fbuf[INET6_ADDRSTRLEN];
	const struct flowside *fside = &f->side[side];

	flow_log_(f, LOG_DEBUG, "New %s from %s/%u: [%s]:%hu <-> [%s]:%hu",
		  flow_type_str[f->type], pif_name(fside->pif), side,
		  inet_ntop(AF_INET6, &fside->faddr, fbuf, sizeof(fbuf)),
		  fside->fport,
		  inet_ntop(AF_INET6, &fside->eaddr, ebuf, sizeof(ebuf)),
		  fside->eport);
}

/**
 * flow_fwd_dbg() - Print debug message for newly forwarded flow
 * @f:		Common flow structure
 * @side:	Which side was the flow forwarded to
 */
void flow_fwd_dbg(const struct flow_common *f, unsigned side)
{
	char ebuf[INET6_ADDRSTRLEN], fbuf[INET6_ADDRSTRLEN];
	const struct flowside *fside = &f->side[side];

	inet_ntop(AF_INET6, &fside->eaddr, ebuf, sizeof(ebuf));
	inet_ntop(AF_INET6, &fside->faddr, fbuf, sizeof(fbuf));

	flow_log_(f, LOG_DEBUG, "Forwarded to %s/%u: [%s]:%hu <-> [%s]:%hu",
		  pif_name(fside->pif), side,
		  inet_ntop(AF_INET6, &fside->faddr, fbuf, sizeof(fbuf)),
		  fside->fport,
		  inet_ntop(AF_INET6, &fside->eaddr, ebuf, sizeof(ebuf)),
		  fside->eport);
}

/** flowside_from_sock - Initialize flowside to match an existing socket
 * @fside:	flowside to initialize
 * @pif:	pif id of this flowside
 * @s:		socket
 * @fsa:	Local addr of @s as sockaddr_in or sockaddr_in6, or NULL
 * @esa:	Remote addr of @s as sockaddr_in or sockaddr_in6, or NULL
 *
 * If NULL is passed for either @fsa/@esa, we use getsockname()/getpeername() to
 * obtain the information from the @s.
 *
 * #syscalls getsockname getpeername
 */
int flowside_from_sock(struct flowside *fside, uint8_t pif, int s,
		       const void *fsa, const void *esa)
{
	struct sockaddr_storage sa;

	fside->pif = pif;

	if (!fsa) {
		socklen_t sl = sizeof(sa);
		if (getsockname(s, (struct sockaddr *)&sa, &sl) < 0)
			return -errno;
		fsa = &sa;
	}
	inany_from_sockaddr(&fside->faddr, &fside->fport,
			    (const struct sockaddr *)fsa);

	if (!esa) {
		socklen_t sl = sizeof(sa);
		if (getpeername(s, (struct sockaddr *)&sa, &sl) < 0)
			return -errno;
		esa = &sa;
	}
	inany_from_sockaddr(&fside->eaddr, &fside->eport,
			    (const struct sockaddr *)esa);

	return 0;
}

/**
 * DOC: Theory of Operation - allocation and freeing of flow entries
 *
 * Each flow takes a single slot in flowtab[].  Moving entries in that table
 * (which we used to do) is fiddly and possibly expensive: it requires updating
 * the hash table indexing flows, and may require updating epoll data which
 * references the flow by index.  However, we also want to keep the active
 * entries in the table compact where possible, because otherwise scanning
 * through the entire table becomes more expensive.  This describes the
 * compromise implemented below.
 *
 * Free blocks
 *    A "free block" is a contiguous sequence of unused (FLOW_TYPE_NONE) entries
 *    in flowtab.  The first entry in each block contains metadata, specifically
 *    the number of entries in the block, and the index of the next (non
 *    contiguous) free block (struct flow_free_block).
 *
 * Free block list
 *    flow_first_free gives the index of the first entry of the first (lowest
 *    index) free block.  Each free block has the index of the next free block,
 *    or MAX_FLOW if it is the last free block.  Together these form a linked
 *    list of free blocks, in strictly increasing order of index.
 *
 * Allocation
 *    When allocating a new flow, we always use the first entry of the first
 *    free block, that is, at index flow_first_free.  If the block has more than
 *    one entry, flow_first_free is updated to the next entry, which is updated
 *    to represent the new smaller free block.  Otherwise the free block is
 *    eliminated and flow_first_free is updated to the next free block.
 *
 * Scanning the table
 *    Theoretically, scanning the table requires FLOW_MAX iterations.  However,
 *    when we encounter the start of a free block, we can immediately skip to
 *    its end, meaning that in practice we only need (number of active
 *    connections) + (number of free blocks) iterations.
 *
 * Freeing
 *    We can only free entries when scanning the whole flow table in
 *    flow_defer_handler().  This is what lets us maintain the fee block list in
 *    index sorted order.  As we scan we keep track of whether the previous
 *    entry was in a free block or not.  If so when an entry is freed (its
 *    deferred handler returns 'true'), we add it to that free block.  Otherwise
 *    we create a new free block for it and chain it to the last free block we
 *    scanned.
 */

/**
 * flow_alloc() - Allocate a new flow
 *
 * Return: pointer to an unused flow entry, or NULL if the table is full
 */
union flow *flow_alloc(void)
{
	union flow *flow = &flowtab[flow_first_free];

	if (flow_first_free >= FLOW_MAX)
		return NULL;

	ASSERT(flow->f.type == FLOW_TYPE_NONE);
	ASSERT(flow->free.n >= 1);

	if (flow->free.n > 1) {
		/* Use one entry from the block */
		union flow *next = &flowtab[++flow_first_free];

		ASSERT(FLOW_IDX(next) < FLOW_MAX);
		ASSERT(next->f.type == FLOW_TYPE_NONE);
		ASSERT(next->free.n == 0);

		next->free.n = flow->free.n - 1;
		next->free.next = flow->free.next;
	} else {
		/* Use the entire block */
		flow_first_free = flow->free.next;
	}

	memset(flow, 0, sizeof(*flow));
	return flow;
}

/**
 * flow_alloc_cancel() - Free a newly allocated flow
 * @flow:	Flow to deallocate
 *
 * @flow must be the last flow allocated by flow_alloc()
 */
void flow_alloc_cancel(union flow *flow)
{
	ASSERT(flow_first_free > FLOW_IDX(flow));

	flow->f.type = FLOW_TYPE_NONE;
	/* Put it back in a length 1 free block, don't attempt to fully reverse
	 * flow_alloc()s steps.  This will get folded together the next time
	 * flow_defer_handler runs anyway() */
	flow->free.n = 1;
	flow->free.next = flow_first_free;
	flow_first_free = FLOW_IDX(flow);
}

/**
 * flow_hash() - Calculate hash value for one side of a flow
 * @c:		Execution context
 * @proto:	Protocol of this flow (IP L4 protocol number)
 * @fside:	Flowside
 *
 * Return: hash value
 */
uint64_t flow_hash(const struct ctx *c, uint8_t proto,
		   const struct flowside *fside)
{
	struct siphash_state state = SIPHASH_INIT(c->hash_secret);

	ASSERT(flowside_complete(fside));
	inany_siphash_feed(&state, &fside->faddr);
	inany_siphash_feed(&state, &fside->eaddr);
	return siphash_final(&state, 38, (uint64_t)proto << 40 |
			     (uint64_t)fside->pif << 32 |
			     fside->fport << 16 | fside->eport);
}

/**
 * flow_defer_handler() - Handler for per-flow deferred and timed tasks
 * @c:		Execution context
 * @now:	Current timestamp
 */
void flow_defer_handler(const struct ctx *c, const struct timespec *now)
{
	struct flow_free_block *free_head = NULL;
	unsigned *last_next = &flow_first_free;
	bool timer = false;
	unsigned idx;

	if (timespec_diff_ms(now, &flow_timer_run) >= FLOW_TIMER_INTERVAL) {
		timer = true;
		flow_timer_run = *now;
	}

	for (idx = 0; idx < FLOW_MAX; idx++) {
		union flow *flow = &flowtab[idx];
		bool closed = false;

		if (flow->f.type == FLOW_TYPE_NONE) {
			/* Start of a free block */
			free_head = &flow->free;
			*last_next = idx;
			last_next = &free_head->next;
			/* Skip the rest of the block */
			idx += free_head->n - 1;
			continue;
		}

		
		switch (flow->f.type) {
		case FLOW_TYPE_NONE:
			closed = true;
			break;
		case FLOW_TCP:
			closed = tcp_flow_defer(flow);
			break;
		case FLOW_TCP_SPLICE:
			closed = tcp_splice_flow_defer(flow);
			if (!closed && timer)
				tcp_splice_timer(c, flow);
			break;
		default:
			/* Assume other flow types don't need any handling */
			;
		}

		if (closed) {
			flow->f.type = FLOW_TYPE_NONE;

			if (free_head) {
				/* Add slot to current free block */
				ASSERT(idx == FLOW_IDX(free_head) + free_head->n);
				free_head->n++;
				flow->free.n = flow->free.next = 0;
			} else {
				/* Create new free block */
				free_head = &flow->free;
				free_head->n = 1;
				*last_next = idx;
				last_next = &free_head->next;
			}
		} else {
			free_head = NULL;
		}
	}

	*last_next = FLOW_MAX;
}

/**
 * flow_init() - Initialise flow related data structures
 */
void flow_init(void)
{
	/* Initial state is a single free block containing the whole table */
	flowtab[0].free.n = FLOW_MAX;
	flowtab[0].free.next = FLOW_MAX;
}

debug log:

solving 70036fc ...
found 70036fc in https://archives.passt.top/passt-dev/20231221070237.1422557-8-david@gibson.dropbear.id.au/
found 263633e in https://archives.passt.top/passt-dev/20231221070237.1422557-7-david@gibson.dropbear.id.au/
found bc8cfc6 in https://archives.passt.top/passt-dev/20231221070237.1422557-6-david@gibson.dropbear.id.au/
found b9c4a18 in https://archives.passt.top/passt-dev/20231221070237.1422557-4-david@gibson.dropbear.id.au/
found 421e6b5 in https://archives.passt.top/passt-dev/20231221061549.976358-14-david@gibson.dropbear.id.au/
found 7b99b58 in https://archives.passt.top/passt-dev/20231221061549.976358-13-david@gibson.dropbear.id.au/
found 8fbe512 in https://archives.passt.top/passt-dev/20231221061549.976358-12-david@gibson.dropbear.id.au/
found 294412a in https://archives.passt.top/passt-dev/20231221061549.976358-11-david@gibson.dropbear.id.au/ ||
	https://archives.passt.top/passt-dev/20231220070908.2506277-11-david@gibson.dropbear.id.au/
found 79c6ae6 in https://archives.passt.top/passt-dev/20231221061549.976358-10-david@gibson.dropbear.id.au/ ||
	https://archives.passt.top/passt-dev/20231220070908.2506277-10-david@gibson.dropbear.id.au/
found ef129db in https://archives.passt.top/passt-dev/20231221061549.976358-7-david@gibson.dropbear.id.au/ ||
	https://archives.passt.top/passt-dev/20231220070908.2506277-7-david@gibson.dropbear.id.au/
found 0a0402d in https://archives.passt.top/passt-dev/20231214021541.3925825-6-david@gibson.dropbear.id.au/ ||
	https://archives.passt.top/passt-dev/20231221061549.976358-6-david@gibson.dropbear.id.au/ ||
	https://archives.passt.top/passt-dev/20231220070908.2506277-6-david@gibson.dropbear.id.au/
found a1c0a34 in https://archives.passt.top/passt-dev/20231214021541.3925825-2-david@gibson.dropbear.id.au/ ||
	https://archives.passt.top/passt-dev/20231221061549.976358-2-david@gibson.dropbear.id.au/ ||
	https://archives.passt.top/passt-dev/20231220070908.2506277-2-david@gibson.dropbear.id.au/
found d24726d in https://passt.top/passt
preparing index
index prepared:
100644 d24726d0521a8f604b3cb8f51441b1408b4e2ff0	flow.c

applying [1/12] https://archives.passt.top/passt-dev/20231214021541.3925825-2-david@gibson.dropbear.id.au/
diff --git a/flow.c b/flow.c
index d24726d..a1c0a34 100644

Checking patch flow.c...
Applied patch flow.c cleanly.

skipping https://archives.passt.top/passt-dev/20231221061549.976358-2-david@gibson.dropbear.id.au/ for a1c0a34
skipping https://archives.passt.top/passt-dev/20231220070908.2506277-2-david@gibson.dropbear.id.au/ for a1c0a34
index at:
100644 a1c0a349a80a7112d31b16372caf0e82db4db19a	flow.c

applying [2/12] https://archives.passt.top/passt-dev/20231214021541.3925825-6-david@gibson.dropbear.id.au/
diff --git a/flow.c b/flow.c
index a1c0a34..0a0402d 100644

Checking patch flow.c...
Applied patch flow.c cleanly.

skipping https://archives.passt.top/passt-dev/20231221061549.976358-6-david@gibson.dropbear.id.au/ for 0a0402d
skipping https://archives.passt.top/passt-dev/20231220070908.2506277-6-david@gibson.dropbear.id.au/ for 0a0402d
index at:
100644 0a0402da89a8391f79333feb7c261f37dad2b434	flow.c

applying [3/12] https://archives.passt.top/passt-dev/20231221061549.976358-7-david@gibson.dropbear.id.au/
diff --git a/flow.c b/flow.c
index 0a0402d..ef129db 100644

Checking patch flow.c...
Applied patch flow.c cleanly.

skipping https://archives.passt.top/passt-dev/20231220070908.2506277-7-david@gibson.dropbear.id.au/ for ef129db
index at:
100644 ef129dbdf9c912fe5377dedfe08809f01e99490e	flow.c

applying [4/12] https://archives.passt.top/passt-dev/20231221061549.976358-10-david@gibson.dropbear.id.au/
diff --git a/flow.c b/flow.c
index ef129db..79c6ae6 100644

Checking patch flow.c...
Applied patch flow.c cleanly.

skipping https://archives.passt.top/passt-dev/20231220070908.2506277-10-david@gibson.dropbear.id.au/ for 79c6ae6
index at:
100644 79c6ae6924533cd5942452f2198494718f680286	flow.c

applying [5/12] https://archives.passt.top/passt-dev/20231221061549.976358-11-david@gibson.dropbear.id.au/
diff --git a/flow.c b/flow.c
index 79c6ae6..294412a 100644

Checking patch flow.c...
Applied patch flow.c cleanly.

skipping https://archives.passt.top/passt-dev/20231220070908.2506277-11-david@gibson.dropbear.id.au/ for 294412a
index at:
100644 294412a316af3f4260cc18036197e19d32203a48	flow.c

applying [6/12] https://archives.passt.top/passt-dev/20231221061549.976358-12-david@gibson.dropbear.id.au/
diff --git a/flow.c b/flow.c
index 294412a..8fbe512 100644


applying [7/12] https://archives.passt.top/passt-dev/20231221061549.976358-13-david@gibson.dropbear.id.au/
diff --git a/flow.c b/flow.c
index 8fbe512..7b99b58 100644


applying [8/12] https://archives.passt.top/passt-dev/20231221061549.976358-14-david@gibson.dropbear.id.au/
diff --git a/flow.c b/flow.c
index 7b99b58..421e6b5 100644


applying [9/12] https://archives.passt.top/passt-dev/20231221070237.1422557-4-david@gibson.dropbear.id.au/
diff --git a/flow.c b/flow.c
index 421e6b5..b9c4a18 100644


applying [10/12] https://archives.passt.top/passt-dev/20231221070237.1422557-6-david@gibson.dropbear.id.au/
diff --git a/flow.c b/flow.c
index b9c4a18..bc8cfc6 100644


applying [11/12] https://archives.passt.top/passt-dev/20231221070237.1422557-7-david@gibson.dropbear.id.au/
diff --git a/flow.c b/flow.c
index bc8cfc6..263633e 100644


applying [12/12] https://archives.passt.top/passt-dev/20231221070237.1422557-8-david@gibson.dropbear.id.au/
diff --git a/flow.c b/flow.c
index 263633e..70036fc 100644

Checking patch flow.c...
Applied patch flow.c cleanly.
Checking patch flow.c...
Applied patch flow.c cleanly.
5:266: trailing whitespace.
		
Checking patch flow.c...
Applied patch flow.c cleanly.
Checking patch flow.c...
Applied patch flow.c cleanly.
Checking patch flow.c...
Applied patch flow.c cleanly.
Checking patch flow.c...
Applied patch flow.c cleanly.
Checking patch flow.c...
Applied patch flow.c cleanly.
warning: 1 line adds whitespace errors.

index at:
100644 70036fc04d6320e32ba7b86238ae1222a8bd75ac	flow.c

Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).