xref: /linux/net/netfilter/nft_set_pipapo.h (revision 9e4e86a604dfd06402933467578c4b79f5412b2c)
1 // SPDX-License-Identifier: GPL-2.0-only
2 
3 #ifndef _NFT_SET_PIPAPO_H
4 
5 #include <linux/log2.h>
6 #include <net/ipv6.h>			/* For the maximum length of a field */
7 
8 /* Count of concatenated fields depends on count of 32-bit nftables registers */
9 #define NFT_PIPAPO_MAX_FIELDS		NFT_REG32_COUNT
10 
11 /* Restrict usage to multiple fields, make sure rbtree is used otherwise */
12 #define NFT_PIPAPO_MIN_FIELDS		2
13 
14 /* Largest supported field size */
15 #define NFT_PIPAPO_MAX_BYTES		(sizeof(struct in6_addr))
16 #define NFT_PIPAPO_MAX_BITS		(NFT_PIPAPO_MAX_BYTES * BITS_PER_BYTE)
17 
18 /* Bits to be grouped together in table buckets depending on set size */
19 #define NFT_PIPAPO_GROUP_BITS_INIT	NFT_PIPAPO_GROUP_BITS_SMALL_SET
20 #define NFT_PIPAPO_GROUP_BITS_SMALL_SET	8
21 #define NFT_PIPAPO_GROUP_BITS_LARGE_SET	4
22 #define NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4				\
23 	BUILD_BUG_ON((NFT_PIPAPO_GROUP_BITS_SMALL_SET != 8) ||		\
24 		     (NFT_PIPAPO_GROUP_BITS_LARGE_SET != 4))
25 #define NFT_PIPAPO_GROUPS_PER_BYTE(f)	(BITS_PER_BYTE / (f)->bb)
26 
27 /* If a lookup table gets bigger than NFT_PIPAPO_LT_SIZE_HIGH, switch to the
28  * small group width, and switch to the big group width if the table gets
29  * smaller than NFT_PIPAPO_LT_SIZE_LOW.
30  *
31  * Picking 2MiB as threshold (for a single table) avoids as much as possible
32  * crossing page boundaries on most architectures (x86-64 and MIPS huge pages,
33  * ARMv7 supersections, POWER "large" pages, SPARC Level 1 regions, etc.), which
34  * keeps performance nice in case kvmalloc() gives us non-contiguous areas.
35  */
36 #define NFT_PIPAPO_LT_SIZE_THRESHOLD	(1 << 21)
37 #define NFT_PIPAPO_LT_SIZE_HYSTERESIS	(1 << 16)
38 #define NFT_PIPAPO_LT_SIZE_HIGH		NFT_PIPAPO_LT_SIZE_THRESHOLD
39 #define NFT_PIPAPO_LT_SIZE_LOW		NFT_PIPAPO_LT_SIZE_THRESHOLD -	\
40 					NFT_PIPAPO_LT_SIZE_HYSTERESIS
41 
42 /* Fields are padded to 32 bits in input registers */
43 #define NFT_PIPAPO_GROUPS_PADDED_SIZE(f)				\
44 	(round_up((f)->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f), sizeof(u32)))
45 
46 /* Number of buckets given by 2 ^ n, with n bucket bits */
47 #define NFT_PIPAPO_BUCKETS(bb)		(1 << (bb))
48 
49 /* Each n-bit range maps to up to n * 2 rules */
50 #define NFT_PIPAPO_MAP_NBITS		(const_ilog2(NFT_PIPAPO_MAX_BITS * 2))
51 
52 /* Use the rest of mapping table buckets for rule indices, but it makes no sense
53  * to exceed 32 bits
54  */
55 #if BITS_PER_LONG == 64
56 #define NFT_PIPAPO_MAP_TOBITS		32
57 #else
58 #define NFT_PIPAPO_MAP_TOBITS		(BITS_PER_LONG - NFT_PIPAPO_MAP_NBITS)
59 #endif
60 
61 /* ...which gives us the highest allowed index for a rule */
62 #define NFT_PIPAPO_RULE0_MAX		((1UL << (NFT_PIPAPO_MAP_TOBITS - 1)) \
63 					- (1UL << NFT_PIPAPO_MAP_NBITS))
64 
65 /* Definitions for vectorised implementations */
66 #ifdef NFT_PIPAPO_ALIGN
67 #define NFT_PIPAPO_ALIGN_HEADROOM					\
68 	(NFT_PIPAPO_ALIGN - ARCH_KMALLOC_MINALIGN)
69 #define NFT_PIPAPO_LT_ALIGN(lt)		(PTR_ALIGN((lt), NFT_PIPAPO_ALIGN))
70 #else
71 #define NFT_PIPAPO_ALIGN_HEADROOM	0
72 #define NFT_PIPAPO_LT_ALIGN(lt)		(lt)
73 #endif /* NFT_PIPAPO_ALIGN */
74 
75 #define nft_pipapo_for_each_field(field, index, match)		\
76 	for ((field) = (match)->f, (index) = 0;			\
77 	     (index) < (match)->field_count;			\
78 	     (index)++, (field)++)
79 
80 /**
81  * union nft_pipapo_map_bucket - Bucket of mapping table
82  * @to:		First rule number (in next field) this rule maps to
83  * @n:		Number of rules (in next field) this rule maps to
84  * @e:		If there's no next field, pointer to element this rule maps to
85  */
86 union nft_pipapo_map_bucket {
87 	struct {
88 #if BITS_PER_LONG == 64
89 		static_assert(NFT_PIPAPO_MAP_TOBITS <= 32);
90 		u32 to;
91 
92 		static_assert(NFT_PIPAPO_MAP_NBITS <= 32);
93 		u32 n;
94 #else
95 		unsigned long to:NFT_PIPAPO_MAP_TOBITS;
96 		unsigned long  n:NFT_PIPAPO_MAP_NBITS;
97 #endif
98 	};
99 	struct nft_pipapo_elem *e;
100 };
101 
102 /**
103  * struct nft_pipapo_field - Lookup, mapping tables and related data for a field
104  * @rules:	Number of inserted rules
105  * @bsize:	Size of each bucket in lookup table, in longs
106  * @rules_alloc: Number of allocated rules, always >= rules
107  * @groups:	Amount of bit groups
108  * @bb:		Number of bits grouped together in lookup table buckets
109  * @lt:		Lookup table: 'groups' rows of buckets
110  * @mt:		Mapping table: one bucket per rule
111  */
112 struct nft_pipapo_field {
113 	unsigned int rules;
114 	unsigned int bsize;
115 	unsigned int rules_alloc;
116 	u8 groups;
117 	u8 bb;
118 	unsigned long *lt;
119 	union nft_pipapo_map_bucket *mt;
120 };
121 
122 /**
123  * struct nft_pipapo_scratch - percpu data used for lookup and matching
124  * @bh_lock:    PREEMPT_RT local spinlock
125  * @map_index:	Current working bitmap index, toggled between field matches
126  * @__map:	store partial matching results during lookup
127  */
128 struct nft_pipapo_scratch {
129 	local_lock_t bh_lock;
130 	u8 map_index;
131 	unsigned long __map[];
132 };
133 
134 /**
135  * struct nft_pipapo_match - Data used for lookup and matching
136  * @field_count:	Amount of fields in set
137  * @bsize_max:		Maximum lookup table bucket size of all fields, in longs
138  * @scratch:		Preallocated per-CPU maps for partial matching results
139  * @rcu:		Matching data is swapped on commits
140  * @f:			Fields, with lookup and mapping tables
141  */
142 struct nft_pipapo_match {
143 	u8 field_count;
144 	unsigned int bsize_max;
145 	struct nft_pipapo_scratch * __percpu *scratch;
146 	struct rcu_head rcu;
147 	struct nft_pipapo_field f[] __counted_by(field_count);
148 };
149 
150 /**
151  * struct nft_pipapo - Representation of a set
152  * @match:	Currently in-use matching data
153  * @clone:	Copy where pending insertions and deletions are kept
154  * @width:	Total bytes to be matched for one packet, including padding
155  * @last_gc:	Timestamp of last garbage collection run, jiffies
156  * @gc_head:	list of nft_trans_gc to queue up for mem reclaim
157  */
158 struct nft_pipapo {
159 	struct nft_pipapo_match __rcu *match;
160 	struct nft_pipapo_match *clone;
161 	int width;
162 	unsigned long last_gc;
163 	struct list_head gc_head;
164 };
165 
166 struct nft_pipapo_elem;
167 
168 /**
169  * struct nft_pipapo_elem - API-facing representation of single set element
170  * @priv:	element placeholder
171  * @ext:	nftables API extensions
172  */
173 struct nft_pipapo_elem {
174 	struct nft_elem_priv	priv;
175 	struct nft_set_ext	ext;
176 };
177 
178 int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules,
179 		  unsigned long *dst,
180 		  const union nft_pipapo_map_bucket *mt, bool match_only);
181 
182 /**
183  * pipapo_and_field_buckets_4bit() - Intersect 4-bit buckets
184  * @f:		Field including lookup table
185  * @dst:	Area to store result
186  * @data:	Input data selecting table buckets
187  */
pipapo_and_field_buckets_4bit(const struct nft_pipapo_field * f,unsigned long * dst,const u8 * data)188 static inline void pipapo_and_field_buckets_4bit(const struct nft_pipapo_field *f,
189 						 unsigned long *dst,
190 						 const u8 *data)
191 {
192 	unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt);
193 	int group;
194 
195 	for (group = 0; group < f->groups; group += BITS_PER_BYTE / 4, data++) {
196 		u8 v;
197 
198 		v = *data >> 4;
199 		__bitmap_and(dst, dst, lt + v * f->bsize,
200 			     f->bsize * BITS_PER_LONG);
201 		lt += f->bsize * NFT_PIPAPO_BUCKETS(4);
202 
203 		v = *data & 0x0f;
204 		__bitmap_and(dst, dst, lt + v * f->bsize,
205 			     f->bsize * BITS_PER_LONG);
206 		lt += f->bsize * NFT_PIPAPO_BUCKETS(4);
207 	}
208 }
209 
210 /**
211  * pipapo_and_field_buckets_8bit() - Intersect 8-bit buckets
212  * @f:		Field including lookup table
213  * @dst:	Area to store result
214  * @data:	Input data selecting table buckets
215  */
pipapo_and_field_buckets_8bit(const struct nft_pipapo_field * f,unsigned long * dst,const u8 * data)216 static inline void pipapo_and_field_buckets_8bit(const struct nft_pipapo_field *f,
217 						 unsigned long *dst,
218 						 const u8 *data)
219 {
220 	unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt);
221 	int group;
222 
223 	for (group = 0; group < f->groups; group++, data++) {
224 		__bitmap_and(dst, dst, lt + *data * f->bsize,
225 			     f->bsize * BITS_PER_LONG);
226 		lt += f->bsize * NFT_PIPAPO_BUCKETS(8);
227 	}
228 }
229 
230 /**
231  * pipapo_estimate_size() - Estimate worst-case for set size
232  * @desc:	Set description, element count and field description used here
233  *
234  * The size for this set type can vary dramatically, as it depends on the number
235  * of rules (composing netmasks) the entries expand to. We compute the worst
236  * case here.
237  *
238  * In general, for a non-ranged entry or a single composing netmask, we need
239  * one bit in each of the sixteen NFT_PIPAPO_BUCKETS, for each 4-bit group (that
240  * is, each input bit needs four bits of matching data), plus a bucket in the
241  * mapping table for each field.
242  *
243  * Return: worst-case set size in bytes, 0 on any overflow
244  */
pipapo_estimate_size(const struct nft_set_desc * desc)245 static u64 pipapo_estimate_size(const struct nft_set_desc *desc)
246 {
247 	unsigned long entry_size;
248 	u64 size;
249 	int i;
250 
251 	for (i = 0, entry_size = 0; i < desc->field_count; i++) {
252 		unsigned long rules;
253 
254 		if (desc->field_len[i] > NFT_PIPAPO_MAX_BYTES)
255 			return 0;
256 
257 		/* Worst-case ranges for each concatenated field: each n-bit
258 		 * field can expand to up to n * 2 rules in each bucket, and
259 		 * each rule also needs a mapping bucket.
260 		 */
261 		rules = ilog2(desc->field_len[i] * BITS_PER_BYTE) * 2;
262 		entry_size += rules *
263 			      NFT_PIPAPO_BUCKETS(NFT_PIPAPO_GROUP_BITS_INIT) /
264 			      BITS_PER_BYTE;
265 		entry_size += rules * sizeof(union nft_pipapo_map_bucket);
266 	}
267 
268 	/* Rules in lookup and mapping tables are needed for each entry */
269 	size = desc->size * entry_size;
270 	if (size && div_u64(size, desc->size) != entry_size)
271 		return 0;
272 
273 	size += sizeof(struct nft_pipapo) + sizeof(struct nft_pipapo_match) * 2;
274 
275 	size += sizeof(struct nft_pipapo_field) * desc->field_count;
276 
277 	return size;
278 }
279 
280 /**
281  * pipapo_resmap_init() - Initialise result map before first use
282  * @m:		Matching data, including mapping table
283  * @res_map:	Result map
284  *
285  * Initialize all bits covered by the first field to one, so that after
286  * the first step, only the matching bits of the first bit group remain.
287  *
288  * If other fields have a large bitmap, set remainder of res_map to 0.
289  */
pipapo_resmap_init(const struct nft_pipapo_match * m,unsigned long * res_map)290 static inline void pipapo_resmap_init(const struct nft_pipapo_match *m, unsigned long *res_map)
291 {
292 	const struct nft_pipapo_field *f = m->f;
293 	int i;
294 
295 	for (i = 0; i < f->bsize; i++)
296 		res_map[i] = ULONG_MAX;
297 
298 	for (i = f->bsize; i < m->bsize_max; i++)
299 		res_map[i] = 0ul;
300 }
301 #endif /* _NFT_SET_PIPAPO_H */
302