xref: /linux/net/netfilter/nf_conntrack_expect.c (revision 87320be9f0d24fce67631b7eef919f0b79c3e45c)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Expectation handling for nf_conntrack. */
3 
4 /* (C) 1999-2001 Paul `Rusty' Russell
5  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
6  * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
7  * (c) 2005-2012 Patrick McHardy <kaber@trash.net>
8  */
9 
10 #include <linux/types.h>
11 #include <linux/netfilter.h>
12 #include <linux/skbuff.h>
13 #include <linux/proc_fs.h>
14 #include <linux/seq_file.h>
15 #include <linux/stddef.h>
16 #include <linux/slab.h>
17 #include <linux/err.h>
18 #include <linux/percpu.h>
19 #include <linux/kernel.h>
20 #include <linux/siphash.h>
21 #include <linux/moduleparam.h>
22 #include <linux/export.h>
23 #include <net/net_namespace.h>
24 #include <net/netns/hash.h>
25 
26 #include <net/netfilter/nf_conntrack.h>
27 #include <net/netfilter/nf_conntrack_core.h>
28 #include <net/netfilter/nf_conntrack_ecache.h>
29 #include <net/netfilter/nf_conntrack_expect.h>
30 #include <net/netfilter/nf_conntrack_helper.h>
31 #include <net/netfilter/nf_conntrack_l4proto.h>
32 #include <net/netfilter/nf_conntrack_tuple.h>
33 #include <net/netfilter/nf_conntrack_zones.h>
34 
35 unsigned int nf_ct_expect_hsize __read_mostly;
36 EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
37 
38 struct hlist_head *nf_ct_expect_hash __read_mostly;
39 EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
40 
41 unsigned int nf_ct_expect_max __read_mostly;
42 
43 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
44 static siphash_aligned_key_t nf_ct_expect_hashrnd;
45 
nf_ct_expectation_gc(struct nf_conn_help * master_help)46 void nf_ct_expectation_gc(struct nf_conn_help *master_help)
47 {
48 	struct nf_conntrack_expect *exp;
49 	struct hlist_node *next;
50 
51 	if (hlist_empty(&master_help->expectations))
52 		return;
53 
54 	spin_lock_bh(&nf_conntrack_expect_lock);
55 	hlist_for_each_entry_safe(exp, next, &master_help->expectations, lnode) {
56 		if (!nf_ct_exp_is_expired(exp))
57 			continue;
58 
59 		nf_ct_unlink_expect(exp);
60 	}
61 	spin_unlock_bh(&nf_conntrack_expect_lock);
62 }
63 
64 /* nf_conntrack_expect helper functions */
nf_ct_unlink_expect_report(struct nf_conntrack_expect * exp,u32 portid,int report)65 void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
66 				u32 portid, int report)
67 {
68 	struct nf_conn_help *master_help = nfct_help(exp->master);
69 	struct net *net = nf_ct_exp_net(exp);
70 	struct nf_conntrack_net *cnet;
71 
72 	lockdep_nfct_expect_lock_held();
73 
74 	hlist_del_rcu(&exp->hnode);
75 
76 	cnet = nf_ct_pernet(net);
77 	cnet->expect_count--;
78 
79 	hlist_del_rcu(&exp->lnode);
80 	if (master_help)
81 		master_help->expecting[exp->class]--;
82 
83 	nf_ct_expect_event_report(IPEXP_DESTROY, exp, portid, report);
84 	nf_ct_expect_put(exp);
85 
86 	NF_CT_STAT_INC(net, expect_delete);
87 }
88 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report);
89 
nf_ct_expect_dst_hash(const struct net * n,const struct nf_conntrack_tuple * tuple)90 static unsigned int nf_ct_expect_dst_hash(const struct net *n, const struct nf_conntrack_tuple *tuple)
91 {
92 	struct {
93 		union nf_inet_addr dst_addr;
94 		u32 net_mix;
95 		u16 dport;
96 		u8 l3num;
97 		u8 protonum;
98 	} __aligned(SIPHASH_ALIGNMENT) combined;
99 	u32 hash;
100 
101 	get_random_once(&nf_ct_expect_hashrnd, sizeof(nf_ct_expect_hashrnd));
102 
103 	memset(&combined, 0, sizeof(combined));
104 
105 	combined.dst_addr = tuple->dst.u3;
106 	combined.net_mix = net_hash_mix(n);
107 	combined.dport = (__force __u16)tuple->dst.u.all;
108 	combined.l3num = tuple->src.l3num;
109 	combined.protonum = tuple->dst.protonum;
110 
111 	hash = siphash(&combined, sizeof(combined), &nf_ct_expect_hashrnd);
112 
113 	return reciprocal_scale(hash, nf_ct_expect_hsize);
114 }
115 
116 static bool
nf_ct_exp_equal(const struct nf_conntrack_tuple * tuple,const struct nf_conntrack_expect * i,const struct nf_conntrack_zone * zone,const struct net * net)117 nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple,
118 		const struct nf_conntrack_expect *i,
119 		const struct nf_conntrack_zone *zone,
120 		const struct net *net)
121 {
122 	return nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
123 	       net_eq(net, read_pnet(&i->net)) &&
124 	       nf_ct_exp_zone_equal_any(i, zone);
125 }
126 
127 struct nf_conntrack_expect *
__nf_ct_expect_find(struct net * net,const struct nf_conntrack_zone * zone,const struct nf_conntrack_tuple * tuple)128 __nf_ct_expect_find(struct net *net,
129 		    const struct nf_conntrack_zone *zone,
130 		    const struct nf_conntrack_tuple *tuple)
131 {
132 	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
133 	struct nf_conntrack_expect *i;
134 	unsigned int h;
135 
136 	if (!cnet->expect_count)
137 		return NULL;
138 
139 	h = nf_ct_expect_dst_hash(net, tuple);
140 	hlist_for_each_entry_rcu(i, &nf_ct_expect_hash[h], hnode) {
141 		if (nf_ct_exp_is_expired(i))
142 			continue;
143 		if (nf_ct_exp_equal(tuple, i, zone, net))
144 			return i;
145 	}
146 	return NULL;
147 }
148 EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
149 
150 /* Just find a expectation corresponding to a tuple. */
151 struct nf_conntrack_expect *
nf_ct_expect_find_get(struct net * net,const struct nf_conntrack_zone * zone,const struct nf_conntrack_tuple * tuple)152 nf_ct_expect_find_get(struct net *net,
153 		      const struct nf_conntrack_zone *zone,
154 		      const struct nf_conntrack_tuple *tuple)
155 {
156 	struct nf_conntrack_expect *i;
157 
158 	rcu_read_lock();
159 	i = __nf_ct_expect_find(net, zone, tuple);
160 	if (i && !refcount_inc_not_zero(&i->use))
161 		i = NULL;
162 	rcu_read_unlock();
163 
164 	return i;
165 }
166 EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
167 
168 /* If an expectation for this connection is found, it gets delete from
169  * global list then returned. */
170 struct nf_conntrack_expect *
nf_ct_find_expectation(struct net * net,const struct nf_conntrack_zone * zone,const struct nf_conntrack_tuple * tuple,bool unlink)171 nf_ct_find_expectation(struct net *net,
172 		       const struct nf_conntrack_zone *zone,
173 		       const struct nf_conntrack_tuple *tuple, bool unlink)
174 {
175 	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
176 	struct nf_conntrack_expect *i, *exp = NULL;
177 	struct hlist_node *next;
178 	unsigned int h;
179 
180 	lockdep_nfct_expect_lock_held();
181 
182 	if (!cnet->expect_count)
183 		return NULL;
184 
185 	h = nf_ct_expect_dst_hash(net, tuple);
186 	hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) {
187 		if (nf_ct_exp_is_expired(i)) {
188 			nf_ct_unlink_expect(i);
189 			continue;
190 		}
191 		if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
192 		    nf_ct_exp_equal(tuple, i, zone, net)) {
193 			exp = i;
194 			break;
195 		}
196 	}
197 	if (!exp)
198 		return NULL;
199 
200 	if (!refcount_inc_not_zero(&exp->use))
201 		return NULL;
202 
203 	/* If master is not in hash table yet (ie. packet hasn't left
204 	   this machine yet), how can other end know about expected?
205 	   Hence these are not the droids you are looking for (if
206 	   master ct never got confirmed, we'd hold a reference to it
207 	   and weird things would happen to future packets). */
208 	if (!nf_ct_is_confirmed(exp->master))
209 		goto err_release_exp;
210 
211 	/* Avoid race with other CPUs, that for exp->master ct, is
212 	 * about to invoke ->destroy(), or nf_ct_delete() via timeout
213 	 * or early_drop().
214 	 *
215 	 * The refcount_inc_not_zero() check tells:  If that fails, we
216 	 * know that the ct is being destroyed.  If it succeeds, we
217 	 * can be sure the ct cannot disappear underneath.
218 	 */
219 	if (unlikely(nf_ct_is_dying(exp->master) ||
220 		     !refcount_inc_not_zero(&exp->master->ct_general.use)))
221 		goto err_release_exp;
222 
223 	if (exp->flags & NF_CT_EXPECT_PERMANENT || !unlink)
224 		return exp;
225 
226 	nf_ct_unlink_expect(exp);
227 
228 	return exp;
229 
230 err_release_exp:
231 	nf_ct_expect_put(exp);
232 	return NULL;
233 }
234 
235 /* delete all expectations for this conntrack */
nf_ct_remove_expectations(struct nf_conn * ct)236 void nf_ct_remove_expectations(struct nf_conn *ct)
237 {
238 	struct nf_conn_help *help = nfct_help(ct);
239 	struct nf_conntrack_expect *exp;
240 	struct hlist_node *next;
241 
242 	/* Optimization: most connection never expect any others. */
243 	if (!help)
244 		return;
245 
246 	spin_lock_bh(&nf_conntrack_expect_lock);
247 	hlist_for_each_entry_safe(exp, next, &help->expectations, lnode)
248 		nf_ct_unlink_expect(exp);
249 	spin_unlock_bh(&nf_conntrack_expect_lock);
250 }
251 EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
252 
253 /* Would two expected things clash? */
expect_clash(const struct nf_conntrack_expect * a,const struct nf_conntrack_expect * b)254 static inline int expect_clash(const struct nf_conntrack_expect *a,
255 			       const struct nf_conntrack_expect *b)
256 {
257 	/* Part covered by intersection of masks must be unequal,
258 	   otherwise they clash */
259 	struct nf_conntrack_tuple_mask intersect_mask;
260 	int count;
261 
262 	intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
263 
264 	for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
265 		intersect_mask.src.u3.all[count] =
266 			a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
267 	}
268 
269 	return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) &&
270 	       net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
271 	       nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
272 }
273 
expect_matches(const struct nf_conntrack_expect * a,const struct nf_conntrack_expect * b)274 static inline int expect_matches(const struct nf_conntrack_expect *a,
275 				 const struct nf_conntrack_expect *b)
276 {
277 	return nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
278 	       nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
279 	       net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
280 	       nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
281 }
282 
master_matches(const struct nf_conntrack_expect * a,const struct nf_conntrack_expect * b,unsigned int flags)283 static bool master_matches(const struct nf_conntrack_expect *a,
284 			   const struct nf_conntrack_expect *b,
285 			   unsigned int flags)
286 {
287 	if (flags & NF_CT_EXP_F_SKIP_MASTER)
288 		return true;
289 
290 	return a->master == b->master;
291 }
292 
293 /* Generally a bad idea to call this: could have matched already. */
nf_ct_unexpect_related(struct nf_conntrack_expect * exp)294 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
295 {
296 	spin_lock_bh(&nf_conntrack_expect_lock);
297 	WRITE_ONCE(exp->flags, exp->flags | NF_CT_EXPECT_DEAD);
298 	spin_unlock_bh(&nf_conntrack_expect_lock);
299 }
300 EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
301 
302 /* We don't increase the master conntrack refcount for non-fulfilled
303  * conntracks. During the conntrack destruction, the expectations are
304  * always killed before the conntrack itself */
nf_ct_expect_alloc(struct nf_conn * me)305 struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
306 {
307 	struct nf_conntrack_expect *new;
308 
309 	new = kmem_cache_zalloc(nf_ct_expect_cachep, GFP_ATOMIC);
310 	if (!new)
311 		return NULL;
312 
313 	new->timeout = nfct_time_stamp;
314 	new->master = me;
315 	refcount_set(&new->use, 1);
316 	return new;
317 }
318 EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
319 
320 /* This function can only be used from packet path, where accessing
321  * master's helper is safe, because the packet holds a reference on
322  * the conntrack object. Never use it from control plane.
323  */
nf_ct_expect_init(struct nf_conntrack_expect * exp,unsigned int class,u_int8_t family,const union nf_inet_addr * saddr,const union nf_inet_addr * daddr,u_int8_t proto,const __be16 * src,const __be16 * dst)324 void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
325 		       u_int8_t family,
326 		       const union nf_inet_addr *saddr,
327 		       const union nf_inet_addr *daddr,
328 		       u_int8_t proto, const __be16 *src, const __be16 *dst)
329 {
330 	struct nf_conntrack_helper *helper = NULL;
331 	struct nf_conn *ct = exp->master;
332 	struct net *net = read_pnet(&ct->ct_net);
333 	struct nf_conn_help *help;
334 	int len;
335 
336 	if (family == AF_INET)
337 		len = 4;
338 	else
339 		len = 16;
340 
341 	exp->flags = 0;
342 	exp->class = class;
343 	exp->expectfn = NULL;
344 
345 	help = nfct_help(ct);
346 	if (help)
347 		helper = rcu_dereference(help->helper);
348 
349 	rcu_assign_pointer(exp->helper, helper);
350 	rcu_assign_pointer(exp->assign_helper, NULL);
351 	write_pnet(&exp->net, net);
352 #ifdef CONFIG_NF_CONNTRACK_ZONES
353 	exp->zone = ct->zone;
354 #endif
355 	exp->tuple.src.l3num = family;
356 	exp->tuple.dst.protonum = proto;
357 
358 	exp->master_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
359 
360 	if (saddr) {
361 		memcpy(&exp->tuple.src.u3, saddr, len);
362 		if (sizeof(exp->tuple.src.u3) > len)
363 			/* address needs to be cleared for nf_ct_tuple_equal */
364 			memset((void *)&exp->tuple.src.u3 + len, 0x00,
365 			       sizeof(exp->tuple.src.u3) - len);
366 		memset(&exp->mask.src.u3, 0xFF, len);
367 		if (sizeof(exp->mask.src.u3) > len)
368 			memset((void *)&exp->mask.src.u3 + len, 0x00,
369 			       sizeof(exp->mask.src.u3) - len);
370 	} else {
371 		memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
372 		memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
373 	}
374 
375 	if (src) {
376 		exp->tuple.src.u.all = *src;
377 		exp->mask.src.u.all = htons(0xFFFF);
378 	} else {
379 		exp->tuple.src.u.all = 0;
380 		exp->mask.src.u.all = 0;
381 	}
382 
383 	memcpy(&exp->tuple.dst.u3, daddr, len);
384 	if (sizeof(exp->tuple.dst.u3) > len)
385 		/* address needs to be cleared for nf_ct_tuple_equal */
386 		memset((void *)&exp->tuple.dst.u3 + len, 0x00,
387 		       sizeof(exp->tuple.dst.u3) - len);
388 
389 	exp->tuple.dst.u.all = *dst;
390 
391 #if IS_ENABLED(CONFIG_NF_NAT)
392 	memset(&exp->saved_addr, 0, sizeof(exp->saved_addr));
393 	memset(&exp->saved_proto, 0, sizeof(exp->saved_proto));
394 	exp->dir = 0;
395 #endif
396 }
397 EXPORT_SYMBOL_GPL(nf_ct_expect_init);
398 
nf_ct_expect_free_rcu(struct rcu_head * head)399 static void nf_ct_expect_free_rcu(struct rcu_head *head)
400 {
401 	struct nf_conntrack_expect *exp;
402 
403 	exp = container_of(head, struct nf_conntrack_expect, rcu);
404 	kmem_cache_free(nf_ct_expect_cachep, exp);
405 }
406 
nf_ct_expect_put(struct nf_conntrack_expect * exp)407 void nf_ct_expect_put(struct nf_conntrack_expect *exp)
408 {
409 	if (refcount_dec_and_test(&exp->use))
410 		call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
411 }
412 EXPORT_SYMBOL_GPL(nf_ct_expect_put);
413 
nf_ct_expect_insert(struct nf_conntrack_expect * exp,struct nf_conn_help * master_help)414 static void nf_ct_expect_insert(struct nf_conntrack_expect *exp,
415 				struct nf_conn_help *master_help)
416 {
417 	struct nf_conntrack_net *cnet;
418 	struct nf_conntrack_helper *helper;
419 	struct net *net = nf_ct_exp_net(exp);
420 	unsigned int h = nf_ct_expect_dst_hash(net, &exp->tuple);
421 
422 	refcount_inc(&exp->use);
423 
424 	helper = rcu_dereference_protected(master_help->helper,
425 					   lockdep_is_held(&nf_conntrack_expect_lock));
426 	if (helper)
427 		exp->timeout += helper->expect_policy[exp->class].timeout * HZ;
428 
429 	hlist_add_head_rcu(&exp->lnode, &master_help->expectations);
430 	master_help->expecting[exp->class]++;
431 
432 	hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
433 	cnet = nf_ct_pernet(net);
434 	cnet->expect_count++;
435 
436 	NF_CT_STAT_INC(net, expect_create);
437 }
438 
evict_oldest_expect(struct nf_conn_help * master_help,struct nf_conntrack_expect * new,const struct nf_conntrack_expect_policy * p)439 static void evict_oldest_expect(struct nf_conn_help *master_help,
440 				struct nf_conntrack_expect *new,
441 				const struct nf_conntrack_expect_policy *p)
442 {
443 	struct nf_conntrack_expect *exp, *last = NULL;
444 	struct hlist_node *next;
445 
446 	hlist_for_each_entry_safe(exp, next, &master_help->expectations, lnode) {
447 		if (nf_ct_exp_is_expired(exp)) {
448 			nf_ct_unlink_expect(exp);
449 			continue;
450 		}
451 		if (exp->class == new->class)
452 			last = exp;
453 	}
454 
455 	/* Still worth to evict oldest expectation after garbage collection? */
456 	if (last &&
457 	    master_help->expecting[last->class] >= p->max_expected)
458 		nf_ct_unlink_expect(last);
459 }
460 
__nf_ct_expect_check(struct nf_conntrack_expect * expect,struct nf_conn_help * master_help,unsigned int flags)461 static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
462 				       struct nf_conn_help *master_help,
463 				       unsigned int flags)
464 {
465 	const struct nf_conntrack_expect_policy *p;
466 	struct nf_conntrack_expect *i;
467 	struct nf_conntrack_net *cnet;
468 	struct nf_conntrack_helper *helper;
469 	struct net *net = nf_ct_exp_net(expect);
470 	struct hlist_node *next;
471 	unsigned int h;
472 	int ret = 0;
473 
474 	lockdep_nfct_expect_lock_held();
475 
476 	h = nf_ct_expect_dst_hash(net, &expect->tuple);
477 	hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) {
478 		if (nf_ct_exp_is_expired(i)) {
479 			nf_ct_unlink_expect(i);
480 			continue;
481 		}
482 		if (master_matches(i, expect, flags) &&
483 		    expect_matches(i, expect)) {
484 			if (i->class != expect->class ||
485 			    i->master != expect->master)
486 				return -EALREADY;
487 
488 			nf_ct_unlink_expect(i);
489 			break;
490 		} else if (expect_clash(i, expect)) {
491 			ret = -EBUSY;
492 			goto out;
493 		}
494 	}
495 	/* Will be over limit? */
496 	helper = rcu_dereference_protected(master_help->helper,
497 					   lockdep_is_held(&nf_conntrack_expect_lock));
498 	if (helper) {
499 		p = &helper->expect_policy[expect->class];
500 		if (master_help->expecting[expect->class] >= p->max_expected)
501 			evict_oldest_expect(master_help, expect, p);
502 	} else {
503 		const struct nf_conntrack_expect_policy default_exp_policy = {
504 			.max_expected = NF_CT_EXPECT_MAX_CNT,
505 		};
506 
507 		if (master_help->expecting[expect->class] >= default_exp_policy.max_expected)
508 			evict_oldest_expect(master_help, expect, &default_exp_policy);
509 	}
510 
511 	cnet = nf_ct_pernet(net);
512 	if (cnet->expect_count >= nf_ct_expect_max) {
513 		net_warn_ratelimited("nf_conntrack: expectation table full\n");
514 		ret = -EMFILE;
515 	}
516 out:
517 	return ret;
518 }
519 
nf_ct_expect_related_report(struct nf_conntrack_expect * expect,u32 portid,int report,unsigned int flags)520 int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
521 				u32 portid, int report, unsigned int flags)
522 {
523 	struct nf_conn_help *master_help;
524 	int ret;
525 
526 	spin_lock_bh(&nf_conntrack_expect_lock);
527 	master_help = nfct_help(expect->master);
528 	if (!master_help) {
529 		ret = -ESHUTDOWN;
530 		goto out;
531 	}
532 
533 	ret = __nf_ct_expect_check(expect, master_help, flags);
534 	if (ret < 0)
535 		goto out;
536 
537 	nf_ct_expect_insert(expect, master_help);
538 
539 	nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
540 	spin_unlock_bh(&nf_conntrack_expect_lock);
541 
542 	return 0;
543 out:
544 	spin_unlock_bh(&nf_conntrack_expect_lock);
545 	return ret;
546 }
547 EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
548 
nf_ct_expect_iterate_destroy(bool (* iter)(struct nf_conntrack_expect * e,void * data),void * data)549 void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, void *data),
550 				  void *data)
551 {
552 	struct nf_conntrack_expect *exp;
553 	const struct hlist_node *next;
554 	unsigned int i;
555 
556 	spin_lock_bh(&nf_conntrack_expect_lock);
557 
558 	for (i = 0; i < nf_ct_expect_hsize; i++) {
559 		hlist_for_each_entry_safe(exp, next,
560 					  &nf_ct_expect_hash[i],
561 					  hnode) {
562 			if (iter(exp, data))
563 				nf_ct_unlink_expect(exp);
564 		}
565 	}
566 
567 	spin_unlock_bh(&nf_conntrack_expect_lock);
568 }
569 EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_destroy);
570 
nf_ct_expect_iterate_net(struct net * net,bool (* iter)(struct nf_conntrack_expect * e,void * data),void * data,u32 portid,int report)571 void nf_ct_expect_iterate_net(struct net *net,
572 			      bool (*iter)(struct nf_conntrack_expect *e, void *data),
573 			      void *data,
574 			      u32 portid, int report)
575 {
576 	struct nf_conntrack_expect *exp;
577 	const struct hlist_node *next;
578 	unsigned int i;
579 
580 	spin_lock_bh(&nf_conntrack_expect_lock);
581 
582 	for (i = 0; i < nf_ct_expect_hsize; i++) {
583 		hlist_for_each_entry_safe(exp, next,
584 					  &nf_ct_expect_hash[i],
585 					  hnode) {
586 
587 			if (!net_eq(nf_ct_exp_net(exp), net))
588 				continue;
589 
590 			if (iter(exp, data))
591 				nf_ct_unlink_expect_report(exp, portid, report);
592 		}
593 	}
594 
595 	spin_unlock_bh(&nf_conntrack_expect_lock);
596 }
597 EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_net);
598 
599 #ifdef CONFIG_NF_CONNTRACK_PROCFS
600 struct ct_expect_iter_state {
601 	struct seq_net_private p;
602 	unsigned int bucket;
603 };
604 
ct_expect_get_first(struct seq_file * seq)605 static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
606 {
607 	struct ct_expect_iter_state *st = seq->private;
608 	struct hlist_node *n;
609 
610 	for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
611 		n = rcu_dereference(hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
612 		if (n)
613 			return n;
614 	}
615 	return NULL;
616 }
617 
ct_expect_get_next(struct seq_file * seq,struct hlist_node * head)618 static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
619 					     struct hlist_node *head)
620 {
621 	struct ct_expect_iter_state *st = seq->private;
622 
623 	head = rcu_dereference(hlist_next_rcu(head));
624 	while (head == NULL) {
625 		if (++st->bucket >= nf_ct_expect_hsize)
626 			return NULL;
627 		head = rcu_dereference(hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
628 	}
629 	return head;
630 }
631 
ct_expect_get_idx(struct seq_file * seq,loff_t pos)632 static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
633 {
634 	struct hlist_node *head = ct_expect_get_first(seq);
635 
636 	if (head)
637 		while (pos && (head = ct_expect_get_next(seq, head)))
638 			pos--;
639 	return pos ? NULL : head;
640 }
641 
exp_seq_start(struct seq_file * seq,loff_t * pos)642 static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
643 	__acquires(RCU)
644 {
645 	rcu_read_lock();
646 	return ct_expect_get_idx(seq, *pos);
647 }
648 
exp_seq_next(struct seq_file * seq,void * v,loff_t * pos)649 static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
650 {
651 	(*pos)++;
652 	return ct_expect_get_next(seq, v);
653 }
654 
exp_seq_stop(struct seq_file * seq,void * v)655 static void exp_seq_stop(struct seq_file *seq, void *v)
656 	__releases(RCU)
657 {
658 	rcu_read_unlock();
659 }
660 
exp_seq_show(struct seq_file * s,void * v)661 static int exp_seq_show(struct seq_file *s, void *v)
662 {
663 	struct nf_conntrack_expect *expect;
664 	struct nf_conntrack_helper *helper;
665 	struct net *net = seq_file_net(s);
666 	struct hlist_node *n = v;
667 	char *delim = "";
668 	__s32 timeout;
669 
670 	expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
671 
672 	if (!net_eq(nf_ct_exp_net(expect), net))
673 		return 0;
674 	if (nf_ct_exp_is_expired(expect))
675 		return 0;
676 
677 	timeout = (__s32)(READ_ONCE(expect->timeout) - nfct_time_stamp) / HZ;
678 	seq_printf(s, "%d ", timeout > 0 ? timeout : 0);
679 	seq_printf(s, "l3proto = %u proto=%u ",
680 		   expect->tuple.src.l3num,
681 		   expect->tuple.dst.protonum);
682 	print_tuple(s, &expect->tuple,
683 		    nf_ct_l4proto_find(expect->tuple.dst.protonum));
684 
685 	if (expect->flags & NF_CT_EXPECT_PERMANENT) {
686 		seq_puts(s, "PERMANENT");
687 		delim = ",";
688 	}
689 	if (expect->flags & NF_CT_EXPECT_INACTIVE) {
690 		seq_printf(s, "%sINACTIVE", delim);
691 		delim = ",";
692 	}
693 	if (expect->flags & NF_CT_EXPECT_USERSPACE)
694 		seq_printf(s, "%sUSERSPACE", delim);
695 
696 	helper = rcu_dereference(expect->helper);
697 	if (helper) {
698 		seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
699 		if (helper->expect_policy[expect->class].name[0])
700 			seq_printf(s, "/%s",
701 				   helper->expect_policy[expect->class].name);
702 	}
703 
704 	seq_putc(s, '\n');
705 
706 	return 0;
707 }
708 
709 static const struct seq_operations exp_seq_ops = {
710 	.start = exp_seq_start,
711 	.next = exp_seq_next,
712 	.stop = exp_seq_stop,
713 	.show = exp_seq_show
714 };
715 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
716 
exp_proc_init(struct net * net)717 static int exp_proc_init(struct net *net)
718 {
719 #ifdef CONFIG_NF_CONNTRACK_PROCFS
720 	struct proc_dir_entry *proc;
721 	kuid_t root_uid;
722 	kgid_t root_gid;
723 
724 	proc = proc_create_net("nf_conntrack_expect", 0440, net->proc_net,
725 			&exp_seq_ops, sizeof(struct ct_expect_iter_state));
726 	if (!proc)
727 		return -ENOMEM;
728 
729 	root_uid = make_kuid(net->user_ns, 0);
730 	root_gid = make_kgid(net->user_ns, 0);
731 	if (uid_valid(root_uid) && gid_valid(root_gid))
732 		proc_set_user(proc, root_uid, root_gid);
733 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
734 	return 0;
735 }
736 
exp_proc_remove(struct net * net)737 static void exp_proc_remove(struct net *net)
738 {
739 #ifdef CONFIG_NF_CONNTRACK_PROCFS
740 	remove_proc_entry("nf_conntrack_expect", net->proc_net);
741 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
742 }
743 
744 module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
745 
nf_conntrack_expect_pernet_init(struct net * net)746 int nf_conntrack_expect_pernet_init(struct net *net)
747 {
748 	return exp_proc_init(net);
749 }
750 
nf_conntrack_expect_pernet_fini(struct net * net)751 void nf_conntrack_expect_pernet_fini(struct net *net)
752 {
753 	exp_proc_remove(net);
754 }
755 
nf_conntrack_expect_init(void)756 int nf_conntrack_expect_init(void)
757 {
758 	if (!nf_ct_expect_hsize) {
759 		nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
760 		if (!nf_ct_expect_hsize)
761 			nf_ct_expect_hsize = 1;
762 	}
763 	nf_ct_expect_max = nf_ct_expect_hsize * 4;
764 	nf_ct_expect_cachep = KMEM_CACHE(nf_conntrack_expect, 0);
765 	if (!nf_ct_expect_cachep)
766 		return -ENOMEM;
767 
768 	nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
769 	if (!nf_ct_expect_hash) {
770 		kmem_cache_destroy(nf_ct_expect_cachep);
771 		return -ENOMEM;
772 	}
773 
774 	return 0;
775 }
776 
nf_conntrack_expect_fini(void)777 void nf_conntrack_expect_fini(void)
778 {
779 	rcu_barrier(); /* Wait for call_rcu() before destroy */
780 	kmem_cache_destroy(nf_ct_expect_cachep);
781 	kvfree(nf_ct_expect_hash);
782 }
783