/* SPDX-License-Identifier: GPL-2.0-or-later * Copyright Red Hat * Author: David Gibson * * Tracking for logical "flows" of packets. */ #ifndef FLOW_H #define FLOW_H #define FLOW_TIMER_INTERVAL 1000 /* ms */ /** * enum flow_state - States of a flow table entry * * An individual flow table entry moves through these states, usually in this * order. * General rules: * - Code outside flow.c should never write common fields of union flow. * - The state field may always be read. * * FREE - Part of the general pool of free flow table entries * Operations: * - flow_alloc() finds an entry and moves it to NEW state * * NEW - Freshly allocated, uninitialised entry * Operations: * - flow_alloc_cancel() returns the entry to FREE state * - flow_initiate() sets the entry's INISIDE details and moves to * INI state * - FLOW_SET_TYPE() sets the entry's type and moves to TYPED state * Caveats: * - No fields other than state may be accessed. * - At most one entry may be in NEW, INI, FWD or TYPED state at a * time, so it's unsafe to use flow_alloc() again until this entry * moves to ACTIVE or FREE state * - You may not return to the main epoll loop while an entry is in * NEW state. * * INI - An entry with INISIDE common information completed * Operations: * - Common fields related to INISIDE may be read * - flow_alloc_cancel() returns the entry to FREE state * - flow_forward() sets the entry's FWDSIDE details and moves to FWD * state * Caveats: * - Other common fields may not be read * - Type specific fields may not be read or written * - At most one entry may be in NEW, INI, FWD or TYPED state at a * time, so it's unsafe to use flow_alloc() again until this entry * moves to ACTIVE or FREE state * - You may not return to the main epoll loop while an entry is in * INI state. * * FWD - An entry with only INISIDE and FWDSIDE common information completed * Operations: * - Common fields related to INISIDE & FWDSIDE may be read * - flow_alloc_cancel() returns the entry to FREE state * - FLOW_SET_TYPE() sets the entry's type and moves to TYPED state * Caveats: * - Other common fields may not be read * - Type specific fields may not be read or written * - At most one entry may be in NEW, INI, FWD or TYPED state at a * time, so it's unsafe to use flow_alloc() again until this entry * moves to ACTIVE or FREE state * - You may not return to the main epoll loop while an entry is in * FWD state. * * TYPED - Generic info initialised, type specific initialisation underway * Operations: * - All common fields may be read * - Type specific fields may be read and written * - flow_alloc_cancel() returns the entry to FREE state * - FLOW_ACTIVATE() moves the entry to ACTIVE state * Caveats: * - At most one entry may be in NEW, INI, FWD or TYPED state at a * time, so it's unsafe to use flow_alloc() again until this entry * moves to ACTIVE or FREE state * - You may not return to the main epoll loop while an entry is in * TYPED state. * * ACTIVE - An active, fully-initialised flow entry * Operations: * - All common fields may be read * - Type specific fields may be read and written * - Flow may be expired by returning 'true' from flow type specific * deferred or timer handler. This will return it to FREE state. * Caveats: * - flow_alloc_cancel() may not be called on it */ enum flow_state { FLOW_STATE_FREE, FLOW_STATE_NEW, FLOW_STATE_INI, FLOW_STATE_FWD, FLOW_STATE_TYPED, FLOW_STATE_ACTIVE, FLOW_NUM_STATES, }; extern const char *flow_state_str[]; #define FLOW_STATE(f) \ ((f)->state < FLOW_NUM_STATES ? flow_state_str[(f)->state] : "?") /** * enum flow_type - Different types of packet flows we track */ enum flow_type { /* Represents an invalid or unused flow */ FLOW_TYPE_NONE = 0, /* A TCP connection between a socket and tap interface */ FLOW_TCP, /* A TCP connection between a host socket and ns socket */ FLOW_TCP_SPLICE, /* ICMP echo requests from guest to host and matching replies back */ FLOW_PING4, /* ICMPv6 echo requests from guest to host and matching replies back */ FLOW_PING6, FLOW_NUM_TYPES, }; extern const char *flow_type_str[]; #define FLOW_TYPE(f) \ ((f)->type < FLOW_NUM_TYPES ? flow_type_str[(f)->type] : "?") extern const uint8_t flow_proto[]; #define FLOW_PROTO(f) \ ((f)->type < FLOW_NUM_TYPES ? flow_proto[(f)->type] : 0) #define SIDES 2 #define INISIDE 0 /* Initiating side */ #define FWDSIDE 1 /* Forwarded side */ /** * struct flow_common - Common fields for packet flows * @state: State of the flow table entry * @type: Type of packet flow * @pif[]: Interface for each side of the flow */ struct flow_common { uint8_t state; uint8_t type; uint8_t pif[SIDES]; }; #define FLOW_INDEX_BITS 17 /* 128k - 1 */ #define FLOW_MAX MAX_FROM_BITS(FLOW_INDEX_BITS) #define FLOW_TABLE_PRESSURE 30 /* % of FLOW_MAX */ #define FLOW_FILE_PRESSURE 30 /* % of c->nofile */ /** * struct flow_sidx - ID for one side of a specific flow * @side: Side referenced (0 or 1) * @flow: Index of flow referenced */ typedef struct flow_sidx { unsigned side :1; unsigned flow :FLOW_INDEX_BITS; } flow_sidx_t; static_assert(sizeof(flow_sidx_t) <= sizeof(uint32_t), "flow_sidx_t must fit within 32 bits"); #define FLOW_SIDX_NONE ((flow_sidx_t){ .flow = FLOW_MAX }) /** * flow_sidx_eq() - Test if two sidx values are equal * @a, @b: sidx values * * Return: true iff @a and @b refer to the same side of the same flow */ static inline bool flow_sidx_eq(flow_sidx_t a, flow_sidx_t b) { return (a.flow == b.flow) && (a.side == b.side); } union flow; void flow_init(void); void flow_defer_handler(const struct ctx *c, const struct timespec *now); void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...) __attribute__((format(printf, 3, 4))); #define flow_log(f_, pri, ...) flow_log_(&(f_)->f, (pri), __VA_ARGS__) #define flow_dbg(f, ...) flow_log((f), LOG_DEBUG, __VA_ARGS__) #define flow_err(f, ...) flow_log((f), LOG_ERR, __VA_ARGS__) #define flow_trace(f, ...) \ do { \ if (log_trace) \ flow_dbg((f), __VA_ARGS__); \ } while (0) #endif /* FLOW_H */