xref: /linux/net/netfilter/nf_conncount.c (revision c8b6f36f766991e3ebebec6596daee4b04dcbc49)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * count the number of connections matching an arbitrary key.
4  *
5  * (C) 2017 Red Hat GmbH
6  * Author: Florian Westphal <fw@strlen.de>
7  *
8  * split from xt_connlimit.c:
9  *   (c) 2000 Gerd Knorr <kraxel@bytesex.org>
10  *   Nov 2002: Martin Bene <martin.bene@icomedias.com>:
11  *		only ignore TIME_WAIT or gone connections
12  *   (C) CC Computer Consultants GmbH, 2007
13  */
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #include <linux/in.h>
16 #include <linux/in6.h>
17 #include <linux/ip.h>
18 #include <linux/ipv6.h>
19 #include <linux/jhash.h>
20 #include <linux/slab.h>
21 #include <linux/list.h>
22 #include <linux/rbtree.h>
23 #include <linux/module.h>
24 #include <linux/random.h>
25 #include <linux/skbuff.h>
26 #include <linux/spinlock.h>
27 #include <linux/netfilter/nf_conntrack_tcp.h>
28 #include <linux/netfilter/x_tables.h>
29 #include <net/netfilter/nf_conntrack.h>
30 #include <net/netfilter/nf_conntrack_count.h>
31 #include <net/netfilter/nf_conntrack_core.h>
32 #include <net/netfilter/nf_conntrack_tuple.h>
33 #include <net/netfilter/nf_conntrack_zones.h>
34 
35 #define CONNCOUNT_SLOTS		256U
36 
37 #define CONNCOUNT_GC_MAX_NODES		8
38 #define CONNCOUNT_GC_MAX_COLLECT	64
39 #define MAX_KEYLEN			5
40 
41 /* we will save the tuples of all connections we care about */
42 struct nf_conncount_tuple {
43 	struct list_head		node;
44 	struct nf_conntrack_tuple	tuple;
45 	struct nf_conntrack_zone	zone;
46 	int				cpu;
47 	u32				jiffies32;
48 };
49 
50 struct nf_conncount_rb {
51 	struct rb_node node;
52 	struct nf_conncount_list list;
53 	u32 key[MAX_KEYLEN];
54 	struct rcu_head rcu_head;
55 };
56 
57 struct nf_conncount_root {
58 	struct rb_root root;
59 	spinlock_t lock;
60 	seqcount_spinlock_t count;
61 };
62 
63 struct nf_conncount_data {
64 	unsigned int keylen;
65 	u32 initval;
66 	struct nf_conncount_root root[CONNCOUNT_SLOTS];
67 	struct net *net;
68 	struct work_struct gc_work;
69 	unsigned long pending_trees[BITS_TO_LONGS(CONNCOUNT_SLOTS)];
70 	unsigned int gc_tree;
71 };
72 
73 static struct kmem_cache *conncount_rb_cachep __read_mostly;
74 static struct kmem_cache *conncount_conn_cachep __read_mostly;
75 
76 static inline bool already_closed(const struct nf_conn *conn)
77 {
78 	if (nf_ct_protonum(conn) == IPPROTO_TCP)
79 		return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT ||
80 		       conn->proto.tcp.state == TCP_CONNTRACK_CLOSE;
81 	else
82 		return false;
83 }
84 
85 static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
86 {
87 	return memcmp(a, b, klen * sizeof(u32));
88 }
89 
90 static void conn_free(struct nf_conncount_list *list,
91 		      struct nf_conncount_tuple *conn)
92 {
93 	lockdep_assert_held(&list->list_lock);
94 
95 	list->count--;
96 	list_del(&conn->node);
97 
98 	kmem_cache_free(conncount_conn_cachep, conn);
99 }
100 
101 static const struct nf_conntrack_tuple_hash *
102 find_or_evict(struct net *net, struct nf_conncount_list *list,
103 	      struct nf_conncount_tuple *conn)
104 {
105 	const struct nf_conntrack_tuple_hash *found;
106 	unsigned long a, b;
107 	int cpu = raw_smp_processor_id();
108 	u32 age;
109 
110 	found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple);
111 	if (found)
112 		return found;
113 	b = conn->jiffies32;
114 	a = (u32)jiffies;
115 
116 	/* conn might have been added just before by another cpu and
117 	 * might still be unconfirmed.  In this case, nf_conntrack_find()
118 	 * returns no result.  Thus only evict if this cpu added the
119 	 * stale entry or if the entry is older than two jiffies.
120 	 */
121 	age = a - b;
122 	if (conn->cpu == cpu || age >= 2) {
123 		conn_free(list, conn);
124 		return ERR_PTR(-ENOENT);
125 	}
126 
127 	return ERR_PTR(-EAGAIN);
128 }
129 
130 static bool get_ct_or_tuple_from_skb(struct net *net,
131 				     const struct sk_buff *skb,
132 				     u16 l3num,
133 				     struct nf_conn **ct,
134 				     struct nf_conntrack_tuple *tuple,
135 				     const struct nf_conntrack_zone **zone,
136 				     bool *refcounted)
137 {
138 	const struct nf_conntrack_tuple_hash *h;
139 	enum ip_conntrack_info ctinfo;
140 	struct nf_conn *found_ct;
141 
142 	found_ct = nf_ct_get(skb, &ctinfo);
143 	if (found_ct && !nf_ct_is_template(found_ct)) {
144 		*tuple = found_ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
145 		*zone = nf_ct_zone(found_ct);
146 		*ct = found_ct;
147 		return true;
148 	}
149 
150 	if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), l3num, net, tuple))
151 		return false;
152 
153 	if (found_ct)
154 		*zone = nf_ct_zone(found_ct);
155 
156 	h = nf_conntrack_find_get(net, *zone, tuple);
157 	if (!h)
158 		return true;
159 
160 	found_ct = nf_ct_tuplehash_to_ctrack(h);
161 	*refcounted = true;
162 	*ct = found_ct;
163 
164 	return true;
165 }
166 
167 static int __nf_conncount_add(struct net *net,
168 			      const struct sk_buff *skb,
169 			      u16 l3num,
170 			      struct nf_conncount_list *list)
171 {
172 	const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
173 	const struct nf_conntrack_tuple_hash *found;
174 	struct nf_conncount_tuple *conn, *conn_n;
175 	struct nf_conntrack_tuple tuple;
176 	struct nf_conn *ct = NULL;
177 	struct nf_conn *found_ct;
178 	unsigned int collect = 0;
179 	bool refcounted = false;
180 	int err = 0;
181 
182 	if (!get_ct_or_tuple_from_skb(net, skb, l3num, &ct, &tuple, &zone, &refcounted))
183 		return -ENOENT;
184 
185 	if (ct && nf_ct_is_confirmed(ct)) {
186 		/* Connection is confirmed but might still be in the setup phase.
187 		 * Only skip the tracking if it is fully assured. This guarantees
188 		 * that setup packets or retransmissions are properly counted and
189 		 * deduplicated.
190 		 */
191 		if (test_bit(IPS_ASSURED_BIT, &ct->status)) {
192 			err = -EEXIST;
193 			goto out_put;
194 		}
195 
196 		goto check_connections;
197 	}
198 
199 	if ((u32)jiffies == list->last_gc &&
200 	    (list->count - list->last_gc_count) < CONNCOUNT_GC_MAX_COLLECT)
201 		goto add_new_node;
202 
203 check_connections:
204 	/* check the saved connections */
205 	list_for_each_entry_safe(conn, conn_n, &list->head, node) {
206 		if (collect > CONNCOUNT_GC_MAX_COLLECT)
207 			break;
208 
209 		found = find_or_evict(net, list, conn);
210 		if (IS_ERR(found)) {
211 			/* Not found, but might be about to be confirmed */
212 			if (PTR_ERR(found) == -EAGAIN) {
213 				if (nf_ct_tuple_equal(&conn->tuple, &tuple) &&
214 				    nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
215 				    nf_ct_zone_id(zone, zone->dir))
216 					goto out_put; /* already exists */
217 			} else {
218 				collect++;
219 			}
220 			continue;
221 		}
222 
223 		found_ct = nf_ct_tuplehash_to_ctrack(found);
224 
225 		if (nf_ct_tuple_equal(&conn->tuple, &tuple) &&
226 		    nf_ct_zone_equal(found_ct, zone, zone->dir)) {
227 			/*
228 			 * We should not see tuples twice unless someone hooks
229 			 * this into a table without "-p tcp --syn".
230 			 *
231 			 * Attempt to avoid a re-add in this case.
232 			 */
233 			nf_ct_put(found_ct);
234 			goto out_put;
235 		} else if (already_closed(found_ct)) {
236 			/*
237 			 * we do not care about connections which are
238 			 * closed already -> ditch it
239 			 */
240 			nf_ct_put(found_ct);
241 			conn_free(list, conn);
242 			collect++;
243 			continue;
244 		}
245 
246 		nf_ct_put(found_ct);
247 	}
248 	list->last_gc = (u32)jiffies;
249 	list->last_gc_count = list->count;
250 
251 add_new_node:
252 	if (WARN_ON_ONCE(list->count > INT_MAX)) {
253 		err = -EOVERFLOW;
254 		goto out_put;
255 	}
256 
257 	conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
258 	if (conn == NULL) {
259 		err = -ENOMEM;
260 		goto out_put;
261 	}
262 
263 	conn->tuple = tuple;
264 	conn->zone = *zone;
265 	conn->cpu = raw_smp_processor_id();
266 	conn->jiffies32 = (u32)jiffies;
267 	list_add_tail(&conn->node, &list->head);
268 	list->count++;
269 
270 out_put:
271 	if (refcounted)
272 		nf_ct_put(ct);
273 	return err;
274 }
275 
276 int nf_conncount_add_skb(struct net *net,
277 			 const struct sk_buff *skb,
278 			 u16 l3num,
279 			 struct nf_conncount_list *list)
280 {
281 	int ret;
282 
283 	/* check the saved connections */
284 	spin_lock_bh(&list->list_lock);
285 	ret = __nf_conncount_add(net, skb, l3num, list);
286 	spin_unlock_bh(&list->list_lock);
287 
288 	return ret;
289 }
290 EXPORT_SYMBOL_GPL(nf_conncount_add_skb);
291 
292 void nf_conncount_list_init(struct nf_conncount_list *list)
293 {
294 	spin_lock_init(&list->list_lock);
295 	INIT_LIST_HEAD(&list->head);
296 	list->count = 0;
297 	list->last_gc_count = 0;
298 	list->last_gc = (u32)jiffies;
299 }
300 EXPORT_SYMBOL_GPL(nf_conncount_list_init);
301 
302 /* Return true if the list is empty. Must be called with BH disabled. */
303 static bool __nf_conncount_gc_list(struct net *net,
304 				   struct nf_conncount_list *list)
305 {
306 	const struct nf_conntrack_tuple_hash *found;
307 	struct nf_conncount_tuple *conn, *conn_n;
308 	struct nf_conn *found_ct;
309 	unsigned int collected = 0;
310 	bool ret = false;
311 
312 	/* don't bother if we just did GC */
313 	if ((u32)jiffies == READ_ONCE(list->last_gc))
314 		return false;
315 
316 	list_for_each_entry_safe(conn, conn_n, &list->head, node) {
317 		found = find_or_evict(net, list, conn);
318 		if (IS_ERR(found)) {
319 			if (PTR_ERR(found) == -ENOENT)
320 				collected++;
321 			continue;
322 		}
323 
324 		found_ct = nf_ct_tuplehash_to_ctrack(found);
325 		if (already_closed(found_ct)) {
326 			/*
327 			 * we do not care about connections which are
328 			 * closed already -> ditch it
329 			 */
330 			nf_ct_put(found_ct);
331 			conn_free(list, conn);
332 			collected++;
333 			continue;
334 		}
335 
336 		nf_ct_put(found_ct);
337 		if (collected > CONNCOUNT_GC_MAX_COLLECT)
338 			break;
339 	}
340 
341 	if (!list->count)
342 		ret = true;
343 	list->last_gc = (u32)jiffies;
344 	list->last_gc_count = list->count;
345 
346 	return ret;
347 }
348 
349 bool nf_conncount_gc_list(struct net *net,
350 			  struct nf_conncount_list *list)
351 {
352 	bool ret;
353 
354 	/* don't bother if other cpu is already doing GC */
355 	if (!spin_trylock_bh(&list->list_lock))
356 		return false;
357 
358 	ret = __nf_conncount_gc_list(net, list);
359 	spin_unlock_bh(&list->list_lock);
360 
361 	return ret;
362 }
363 EXPORT_SYMBOL_GPL(nf_conncount_gc_list);
364 
365 static void __tree_nodes_free(struct rcu_head *h)
366 {
367 	struct nf_conncount_rb *rbconn;
368 
369 	rbconn = container_of(h, struct nf_conncount_rb, rcu_head);
370 	kmem_cache_free(conncount_rb_cachep, rbconn);
371 }
372 
373 static void tree_nodes_free(struct nf_conncount_root *root,
374 			    struct nf_conncount_rb *gc_nodes[],
375 			    unsigned int gc_count)
376 {
377 	struct nf_conncount_rb *rbconn;
378 
379 	lockdep_assert_held(&root->lock);
380 
381 	while (gc_count) {
382 		rbconn = gc_nodes[--gc_count];
383 		spin_lock(&rbconn->list.list_lock);
384 		if (!rbconn->list.count) {
385 			write_seqcount_begin(&root->count);
386 			rb_erase(&rbconn->node, &root->root);
387 			call_rcu(&rbconn->rcu_head, __tree_nodes_free);
388 			write_seqcount_end(&root->count);
389 		}
390 		spin_unlock(&rbconn->list.list_lock);
391 	}
392 }
393 
394 static void schedule_gc_worker(struct nf_conncount_data *data, int tree)
395 {
396 	set_bit(tree, data->pending_trees);
397 	schedule_work(&data->gc_work);
398 }
399 
400 static unsigned int
401 insert_tree(struct net *net,
402 	    const struct sk_buff *skb,
403 	    u16 l3num,
404 	    struct nf_conncount_data *data,
405 	    unsigned int hash,
406 	    const u32 *key)
407 {
408 	struct nf_conncount_root *root = &data->root[hash];
409 	struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
410 	const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
411 	bool do_gc = true, refcounted = false;
412 	unsigned int count = 0, gc_count = 0;
413 	struct rb_node **rbnode, *parent;
414 	struct nf_conntrack_tuple tuple;
415 	struct nf_conncount_tuple *conn;
416 	struct nf_conncount_rb *rbconn;
417 	struct nf_conn *ct = NULL;
418 
419 	spin_lock_bh(&root->lock);
420 restart:
421 	parent = NULL;
422 	rbnode = &root->root.rb_node;
423 	while (*rbnode) {
424 		int diff;
425 		rbconn = rb_entry(*rbnode, struct nf_conncount_rb, node);
426 
427 		parent = *rbnode;
428 		diff = key_diff(key, rbconn->key, data->keylen);
429 		if (diff < 0) {
430 			rbnode = &((*rbnode)->rb_left);
431 		} else if (diff > 0) {
432 			rbnode = &((*rbnode)->rb_right);
433 		} else {
434 			int ret;
435 
436 			ret = nf_conncount_add_skb(net, skb, l3num, &rbconn->list);
437 			if (ret && ret != -EEXIST)
438 				count = 0; /* hotdrop */
439 			else
440 				count = rbconn->list.count;
441 			tree_nodes_free(root, gc_nodes, gc_count);
442 			goto out_unlock;
443 		}
444 
445 		if (gc_count >= ARRAY_SIZE(gc_nodes))
446 			continue;
447 
448 		if (do_gc && nf_conncount_gc_list(net, &rbconn->list))
449 			gc_nodes[gc_count++] = rbconn;
450 	}
451 
452 	if (gc_count) {
453 		tree_nodes_free(root, gc_nodes, gc_count);
454 		schedule_gc_worker(data, hash);
455 		gc_count = 0;
456 		do_gc = false;
457 		goto restart;
458 	}
459 
460 	if (get_ct_or_tuple_from_skb(net, skb, l3num, &ct, &tuple, &zone, &refcounted)) {
461 		/* expected case: match, insert new node */
462 		rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
463 		if (rbconn == NULL)
464 			goto out_unlock;
465 
466 		conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
467 		if (conn == NULL) {
468 			kmem_cache_free(conncount_rb_cachep, rbconn);
469 			goto out_unlock;
470 		}
471 
472 		conn->tuple = tuple;
473 		conn->zone = *zone;
474 		conn->cpu = raw_smp_processor_id();
475 		conn->jiffies32 = (u32)jiffies;
476 		memcpy(rbconn->key, key, sizeof(u32) * data->keylen);
477 
478 		nf_conncount_list_init(&rbconn->list);
479 		list_add(&conn->node, &rbconn->list.head);
480 		count = 1;
481 		rbconn->list.count = count;
482 
483 		write_seqcount_begin(&root->count);
484 		rb_link_node_rcu(&rbconn->node, parent, rbnode);
485 		rb_insert_color(&rbconn->node, &root->root);
486 		write_seqcount_end(&root->count);
487 	}
488 out_unlock:
489 	if (refcounted)
490 		nf_ct_put(ct);
491 	spin_unlock_bh(&root->lock);
492 	return count;
493 }
494 
495 static struct nf_conncount_rb *
496 find_tree_node(struct nf_conncount_root *root, struct nf_conncount_data *data,
497 	       const u32 *key)
498 {
499 	unsigned int seq = read_seqcount_begin(&root->count);
500 	struct rb_node *parent;
501 
502 	parent = rcu_dereference_check(root->root.rb_node,
503 				       lockdep_is_held(&root->lock));
504 	while (parent) {
505 		struct nf_conncount_rb *rbconn;
506 		int diff;
507 
508 		rbconn = rb_entry(parent, struct nf_conncount_rb, node);
509 
510 		diff = key_diff(key, rbconn->key, data->keylen);
511 		if (diff < 0)
512 			parent = rcu_dereference_check(parent->rb_left,
513 						       lockdep_is_held(&root->lock));
514 		else if (diff > 0)
515 			parent = rcu_dereference_check(parent->rb_right,
516 						       lockdep_is_held(&root->lock));
517 		else
518 			return rbconn;
519 
520 		if (read_seqcount_retry(&root->count, seq))
521 			return ERR_PTR(-EAGAIN);
522 	}
523 
524 	if (read_seqcount_retry(&root->count, seq))
525 		return ERR_PTR(-EAGAIN);
526 
527 	return ERR_PTR(-ENOENT);
528 }
529 
530 static unsigned int
531 count_tree(struct net *net,
532 	   const struct sk_buff *skb,
533 	   u16 l3num,
534 	   struct nf_conncount_data *data,
535 	   const u32 *key)
536 {
537 	struct nf_conncount_root *root;
538 	struct nf_conncount_rb *rbconn;
539 	unsigned int hash;
540 	int ret;
541 
542 	hash = jhash2(key, data->keylen, data->initval) % CONNCOUNT_SLOTS;
543 	root = &data->root[hash];
544 
545 	rbconn = find_tree_node(root, data, key);
546 	if (IS_ERR(rbconn)) {
547 		if (PTR_ERR(rbconn) == -EAGAIN) {
548 			spin_lock_bh(&root->lock);
549 			rbconn = find_tree_node(root, data, key);
550 			spin_unlock_bh(&root->lock);
551 		}
552 
553 		if (PTR_ERR(rbconn) == -ENOENT) {
554 			if (!skb)
555 				return 0;
556 
557 			return insert_tree(net, skb, l3num, data, hash, key);
558 		}
559 		DEBUG_NET_WARN_ON_ONCE(IS_ERR(rbconn));
560 	}
561 
562 	DEBUG_NET_WARN_ON_ONCE(IS_ERR_OR_NULL(rbconn));
563 	if (IS_ERR_OR_NULL(rbconn))
564 		return 0;
565 
566 	if (!skb) {
567 		nf_conncount_gc_list(net, &rbconn->list);
568 		return rbconn->list.count;
569 	}
570 
571 	spin_lock_bh(&rbconn->list.list_lock);
572 	/* Node might be about to be free'd.
573 	 * We need to defer to insert_tree() in this case.
574 	 */
575 	if (rbconn->list.count == 0) {
576 		spin_unlock_bh(&rbconn->list.list_lock);
577 		return insert_tree(net, skb, l3num, data, hash, key);
578 	}
579 
580 	/* same source network -> be counted! */
581 	ret = __nf_conncount_add(net, skb, l3num, &rbconn->list);
582 	spin_unlock_bh(&rbconn->list.list_lock);
583 
584 	if (ret && ret != -EEXIST)
585 		return 0; /* hotdrop */
586 	/* -EEXIST means add was skipped, update the list */
587 	if (ret == -EEXIST)
588 		nf_conncount_gc_list(net, &rbconn->list);
589 
590 	return rbconn->list.count;
591 }
592 
593 static void tree_gc_worker(struct work_struct *work)
594 {
595 	struct nf_conncount_data *data = container_of(work, struct nf_conncount_data, gc_work);
596 	struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES], *rbconn;
597 	unsigned int tree, next_tree, gc_count = 0;
598 	struct nf_conncount_root *root;
599 	struct rb_node *node;
600 
601 	if (data->gc_tree == 0)
602 		data->gc_tree = find_first_bit(data->pending_trees, CONNCOUNT_SLOTS);
603 
604 	tree = data->gc_tree % CONNCOUNT_SLOTS;
605 	root = &data->root[tree];
606 
607 	spin_lock_bh(&root->lock);
608 	gc_count = 0;
609 	node = rb_first(&root->root);
610 	while (node != NULL) {
611 		u32 key[MAX_KEYLEN];
612 		bool drop_lock;
613 
614 		rbconn = rb_entry(node, struct nf_conncount_rb, node);
615 		node = rb_next(node);
616 
617 		if (nf_conncount_gc_list(data->net, &rbconn->list))
618 			gc_nodes[gc_count++] = rbconn;
619 
620 		drop_lock = need_resched();
621 
622 		if (drop_lock || gc_count >= ARRAY_SIZE(gc_nodes)) {
623 			tree_nodes_free(root, gc_nodes, gc_count);
624 			gc_count = 0;
625 		}
626 
627 		if (!drop_lock || !node)
628 			continue;
629 
630 		rbconn = rb_entry(node, struct nf_conncount_rb, node);
631 		memcpy(key, rbconn->key, sizeof(key));
632 		spin_unlock_bh(&root->lock);
633 
634 		cond_resched();
635 
636 		spin_lock_bh(&root->lock);
637 		rbconn = find_tree_node(root, data, key);
638 		if (IS_ERR_OR_NULL(rbconn)) /* rbconn was reaped */
639 			break;
640 
641 		node = &rbconn->node;
642 	}
643 
644 	tree_nodes_free(root, gc_nodes, gc_count);
645 	clear_bit(tree, data->pending_trees);
646 
647 	next_tree = (tree + 1) % CONNCOUNT_SLOTS;
648 	next_tree = find_next_bit(data->pending_trees, CONNCOUNT_SLOTS, next_tree);
649 
650 	if (next_tree < CONNCOUNT_SLOTS) {
651 		data->gc_tree = next_tree;
652 		schedule_work(work);
653 	} else {
654 		data->gc_tree = 0;
655 	}
656 
657 	spin_unlock_bh(&root->lock);
658 }
659 
660 /* Count and return number of conntrack entries in 'net' with particular 'key'.
661  * If 'skb' is not null, insert the corresponding tuple into the accounting
662  * data structure. Call with RCU read lock.
663  */
664 unsigned int nf_conncount_count_skb(struct net *net,
665 				    const struct sk_buff *skb,
666 				    u16 l3num,
667 				    struct nf_conncount_data *data,
668 				    const u32 *key)
669 {
670 	return count_tree(net, skb, l3num, data, key);
671 
672 }
673 EXPORT_SYMBOL_GPL(nf_conncount_count_skb);
674 
675 static void nf_conncount_root_init(struct nf_conncount_root *r)
676 {
677 	r->root = RB_ROOT;
678 	spin_lock_init(&r->lock);
679 	seqcount_spinlock_init(&r->count, &r->lock);
680 }
681 
682 struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen)
683 {
684 	struct nf_conncount_data *data;
685 	int i;
686 
687 	if (keylen % sizeof(u32) ||
688 	    keylen / sizeof(u32) > MAX_KEYLEN ||
689 	    keylen == 0)
690 		return ERR_PTR(-EINVAL);
691 
692 	data = kvzalloc_obj(*data);
693 	if (!data)
694 		return ERR_PTR(-ENOMEM);
695 
696 	for (i = 0; i < ARRAY_SIZE(data->root); ++i)
697 		nf_conncount_root_init(&data->root[i]);
698 
699 	data->keylen = keylen / sizeof(u32);
700 	data->net = net;
701 	data->initval = get_random_u32();
702 	INIT_WORK(&data->gc_work, tree_gc_worker);
703 
704 	return data;
705 }
706 EXPORT_SYMBOL_GPL(nf_conncount_init);
707 
708 void nf_conncount_cache_free(struct nf_conncount_list *list)
709 {
710 	struct nf_conncount_tuple *conn, *conn_n;
711 
712 	list_for_each_entry_safe(conn, conn_n, &list->head, node)
713 		kmem_cache_free(conncount_conn_cachep, conn);
714 }
715 EXPORT_SYMBOL_GPL(nf_conncount_cache_free);
716 
717 static void destroy_tree(struct nf_conncount_root *r)
718 {
719 	struct nf_conncount_rb *rbconn;
720 	struct rb_node *node;
721 
722 	while ((node = rb_first(&r->root)) != NULL) {
723 		rbconn = rb_entry(node, struct nf_conncount_rb, node);
724 
725 		rb_erase(node, &r->root);
726 
727 		nf_conncount_cache_free(&rbconn->list);
728 
729 		kmem_cache_free(conncount_rb_cachep, rbconn);
730 	}
731 }
732 
733 void nf_conncount_destroy(struct net *net, struct nf_conncount_data *data)
734 {
735 	unsigned int i;
736 
737 	disable_work_sync(&data->gc_work);
738 
739 	for (i = 0; i < ARRAY_SIZE(data->root); ++i)
740 		destroy_tree(&data->root[i]);
741 
742 	kvfree(data);
743 }
744 EXPORT_SYMBOL_GPL(nf_conncount_destroy);
745 
746 static int __init nf_conncount_modinit(void)
747 {
748 	conncount_conn_cachep = KMEM_CACHE(nf_conncount_tuple, 0);
749 	if (!conncount_conn_cachep)
750 		return -ENOMEM;
751 
752 	conncount_rb_cachep = KMEM_CACHE(nf_conncount_rb, 0);
753 	if (!conncount_rb_cachep) {
754 		kmem_cache_destroy(conncount_conn_cachep);
755 		return -ENOMEM;
756 	}
757 
758 	return 0;
759 }
760 
761 static void __exit nf_conncount_modexit(void)
762 {
763 	rcu_barrier();
764 	kmem_cache_destroy(conncount_conn_cachep);
765 	kmem_cache_destroy(conncount_rb_cachep);
766 }
767 
768 module_init(nf_conncount_modinit);
769 module_exit(nf_conncount_modexit);
770 MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
771 MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
772 MODULE_DESCRIPTION("netfilter: count number of connections matching a key");
773 MODULE_LICENSE("GPL");
774