xref: /linux/net/ipv6/ip6_fib.c (revision f3d9478b2ce468c3115b02ecae7e975990697f15)
1 /*
2  *	Linux INET6 implementation
3  *	Forwarding Information Database
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	$Id: ip6_fib.c,v 1.25 2001/10/31 21:55:55 davem Exp $
9  *
10  *	This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15 
16 /*
17  * 	Changes:
18  * 	Yuji SEKIYA @USAGI:	Support default route on router node;
19  * 				remove ip6_null_entry from the top of
20  * 				routing table.
21  */
22 #include <linux/config.h>
23 #include <linux/errno.h>
24 #include <linux/types.h>
25 #include <linux/net.h>
26 #include <linux/route.h>
27 #include <linux/netdevice.h>
28 #include <linux/in6.h>
29 #include <linux/init.h>
30 
31 #ifdef 	CONFIG_PROC_FS
32 #include <linux/proc_fs.h>
33 #endif
34 
35 #include <net/ipv6.h>
36 #include <net/ndisc.h>
37 #include <net/addrconf.h>
38 
39 #include <net/ip6_fib.h>
40 #include <net/ip6_route.h>
41 
42 #define RT6_DEBUG 2
43 
44 #if RT6_DEBUG >= 3
45 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
46 #else
47 #define RT6_TRACE(x...) do { ; } while (0)
48 #endif
49 
50 struct rt6_statistics	rt6_stats;
51 
52 static kmem_cache_t * fib6_node_kmem __read_mostly;
53 
54 enum fib_walk_state_t
55 {
56 #ifdef CONFIG_IPV6_SUBTREES
57 	FWS_S,
58 #endif
59 	FWS_L,
60 	FWS_R,
61 	FWS_C,
62 	FWS_U
63 };
64 
65 struct fib6_cleaner_t
66 {
67 	struct fib6_walker_t w;
68 	int (*func)(struct rt6_info *, void *arg);
69 	void *arg;
70 };
71 
72 DEFINE_RWLOCK(fib6_walker_lock);
73 
74 
75 #ifdef CONFIG_IPV6_SUBTREES
76 #define FWS_INIT FWS_S
77 #define SUBTREE(fn) ((fn)->subtree)
78 #else
79 #define FWS_INIT FWS_L
80 #define SUBTREE(fn) NULL
81 #endif
82 
83 static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt);
84 static struct fib6_node * fib6_repair_tree(struct fib6_node *fn);
85 
86 /*
87  *	A routing update causes an increase of the serial number on the
88  *	affected subtree. This allows for cached routes to be asynchronously
89  *	tested when modifications are made to the destination cache as a
90  *	result of redirects, path MTU changes, etc.
91  */
92 
93 static __u32 rt_sernum;
94 
95 static DEFINE_TIMER(ip6_fib_timer, fib6_run_gc, 0, 0);
96 
97 struct fib6_walker_t fib6_walker_list = {
98 	.prev	= &fib6_walker_list,
99 	.next	= &fib6_walker_list,
100 };
101 
102 #define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next)
103 
104 static __inline__ u32 fib6_new_sernum(void)
105 {
106 	u32 n = ++rt_sernum;
107 	if ((__s32)n <= 0)
108 		rt_sernum = n = 1;
109 	return n;
110 }
111 
112 /*
113  *	Auxiliary address test functions for the radix tree.
114  *
115  *	These assume a 32bit processor (although it will work on
116  *	64bit processors)
117  */
118 
119 /*
120  *	test bit
121  */
122 
123 static __inline__ int addr_bit_set(void *token, int fn_bit)
124 {
125 	__u32 *addr = token;
126 
127 	return htonl(1 << ((~fn_bit)&0x1F)) & addr[fn_bit>>5];
128 }
129 
130 static __inline__ struct fib6_node * node_alloc(void)
131 {
132 	struct fib6_node *fn;
133 
134 	if ((fn = kmem_cache_alloc(fib6_node_kmem, SLAB_ATOMIC)) != NULL)
135 		memset(fn, 0, sizeof(struct fib6_node));
136 
137 	return fn;
138 }
139 
140 static __inline__ void node_free(struct fib6_node * fn)
141 {
142 	kmem_cache_free(fib6_node_kmem, fn);
143 }
144 
145 static __inline__ void rt6_release(struct rt6_info *rt)
146 {
147 	if (atomic_dec_and_test(&rt->rt6i_ref))
148 		dst_free(&rt->u.dst);
149 }
150 
151 
152 /*
153  *	Routing Table
154  *
155  *	return the appropriate node for a routing tree "add" operation
156  *	by either creating and inserting or by returning an existing
157  *	node.
158  */
159 
160 static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
161 				     int addrlen, int plen,
162 				     int offset)
163 {
164 	struct fib6_node *fn, *in, *ln;
165 	struct fib6_node *pn = NULL;
166 	struct rt6key *key;
167 	int	bit;
168        	int	dir = 0;
169 	__u32	sernum = fib6_new_sernum();
170 
171 	RT6_TRACE("fib6_add_1\n");
172 
173 	/* insert node in tree */
174 
175 	fn = root;
176 
177 	do {
178 		key = (struct rt6key *)((u8 *)fn->leaf + offset);
179 
180 		/*
181 		 *	Prefix match
182 		 */
183 		if (plen < fn->fn_bit ||
184 		    !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
185 			goto insert_above;
186 
187 		/*
188 		 *	Exact match ?
189 		 */
190 
191 		if (plen == fn->fn_bit) {
192 			/* clean up an intermediate node */
193 			if ((fn->fn_flags & RTN_RTINFO) == 0) {
194 				rt6_release(fn->leaf);
195 				fn->leaf = NULL;
196 			}
197 
198 			fn->fn_sernum = sernum;
199 
200 			return fn;
201 		}
202 
203 		/*
204 		 *	We have more bits to go
205 		 */
206 
207 		/* Try to walk down on tree. */
208 		fn->fn_sernum = sernum;
209 		dir = addr_bit_set(addr, fn->fn_bit);
210 		pn = fn;
211 		fn = dir ? fn->right: fn->left;
212 	} while (fn);
213 
214 	/*
215 	 *	We walked to the bottom of tree.
216 	 *	Create new leaf node without children.
217 	 */
218 
219 	ln = node_alloc();
220 
221 	if (ln == NULL)
222 		return NULL;
223 	ln->fn_bit = plen;
224 
225 	ln->parent = pn;
226 	ln->fn_sernum = sernum;
227 
228 	if (dir)
229 		pn->right = ln;
230 	else
231 		pn->left  = ln;
232 
233 	return ln;
234 
235 
236 insert_above:
237 	/*
238 	 * split since we don't have a common prefix anymore or
239 	 * we have a less significant route.
240 	 * we've to insert an intermediate node on the list
241 	 * this new node will point to the one we need to create
242 	 * and the current
243 	 */
244 
245 	pn = fn->parent;
246 
247 	/* find 1st bit in difference between the 2 addrs.
248 
249 	   See comment in __ipv6_addr_diff: bit may be an invalid value,
250 	   but if it is >= plen, the value is ignored in any case.
251 	 */
252 
253 	bit = __ipv6_addr_diff(addr, &key->addr, addrlen);
254 
255 	/*
256 	 *		(intermediate)[in]
257 	 *	          /	   \
258 	 *	(new leaf node)[ln] (old node)[fn]
259 	 */
260 	if (plen > bit) {
261 		in = node_alloc();
262 		ln = node_alloc();
263 
264 		if (in == NULL || ln == NULL) {
265 			if (in)
266 				node_free(in);
267 			if (ln)
268 				node_free(ln);
269 			return NULL;
270 		}
271 
272 		/*
273 		 * new intermediate node.
274 		 * RTN_RTINFO will
275 		 * be off since that an address that chooses one of
276 		 * the branches would not match less specific routes
277 		 * in the other branch
278 		 */
279 
280 		in->fn_bit = bit;
281 
282 		in->parent = pn;
283 		in->leaf = fn->leaf;
284 		atomic_inc(&in->leaf->rt6i_ref);
285 
286 		in->fn_sernum = sernum;
287 
288 		/* update parent pointer */
289 		if (dir)
290 			pn->right = in;
291 		else
292 			pn->left  = in;
293 
294 		ln->fn_bit = plen;
295 
296 		ln->parent = in;
297 		fn->parent = in;
298 
299 		ln->fn_sernum = sernum;
300 
301 		if (addr_bit_set(addr, bit)) {
302 			in->right = ln;
303 			in->left  = fn;
304 		} else {
305 			in->left  = ln;
306 			in->right = fn;
307 		}
308 	} else { /* plen <= bit */
309 
310 		/*
311 		 *		(new leaf node)[ln]
312 		 *	          /	   \
313 		 *	     (old node)[fn] NULL
314 		 */
315 
316 		ln = node_alloc();
317 
318 		if (ln == NULL)
319 			return NULL;
320 
321 		ln->fn_bit = plen;
322 
323 		ln->parent = pn;
324 
325 		ln->fn_sernum = sernum;
326 
327 		if (dir)
328 			pn->right = ln;
329 		else
330 			pn->left  = ln;
331 
332 		if (addr_bit_set(&key->addr, plen))
333 			ln->right = fn;
334 		else
335 			ln->left  = fn;
336 
337 		fn->parent = ln;
338 	}
339 	return ln;
340 }
341 
342 /*
343  *	Insert routing information in a node.
344  */
345 
346 static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
347 		struct nlmsghdr *nlh,  struct netlink_skb_parms *req)
348 {
349 	struct rt6_info *iter = NULL;
350 	struct rt6_info **ins;
351 
352 	ins = &fn->leaf;
353 
354 	if (fn->fn_flags&RTN_TL_ROOT &&
355 	    fn->leaf == &ip6_null_entry &&
356 	    !(rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ){
357 		fn->leaf = rt;
358 		rt->u.next = NULL;
359 		goto out;
360 	}
361 
362 	for (iter = fn->leaf; iter; iter=iter->u.next) {
363 		/*
364 		 *	Search for duplicates
365 		 */
366 
367 		if (iter->rt6i_metric == rt->rt6i_metric) {
368 			/*
369 			 *	Same priority level
370 			 */
371 
372 			if (iter->rt6i_dev == rt->rt6i_dev &&
373 			    iter->rt6i_idev == rt->rt6i_idev &&
374 			    ipv6_addr_equal(&iter->rt6i_gateway,
375 					    &rt->rt6i_gateway)) {
376 				if (!(iter->rt6i_flags&RTF_EXPIRES))
377 					return -EEXIST;
378 				iter->rt6i_expires = rt->rt6i_expires;
379 				if (!(rt->rt6i_flags&RTF_EXPIRES)) {
380 					iter->rt6i_flags &= ~RTF_EXPIRES;
381 					iter->rt6i_expires = 0;
382 				}
383 				return -EEXIST;
384 			}
385 		}
386 
387 		if (iter->rt6i_metric > rt->rt6i_metric)
388 			break;
389 
390 		ins = &iter->u.next;
391 	}
392 
393 	/*
394 	 *	insert node
395 	 */
396 
397 out:
398 	rt->u.next = iter;
399 	*ins = rt;
400 	rt->rt6i_node = fn;
401 	atomic_inc(&rt->rt6i_ref);
402 	inet6_rt_notify(RTM_NEWROUTE, rt, nlh, req);
403 	rt6_stats.fib_rt_entries++;
404 
405 	if ((fn->fn_flags & RTN_RTINFO) == 0) {
406 		rt6_stats.fib_route_nodes++;
407 		fn->fn_flags |= RTN_RTINFO;
408 	}
409 
410 	return 0;
411 }
412 
413 static __inline__ void fib6_start_gc(struct rt6_info *rt)
414 {
415 	if (ip6_fib_timer.expires == 0 &&
416 	    (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE)))
417 		mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
418 }
419 
420 void fib6_force_start_gc(void)
421 {
422 	if (ip6_fib_timer.expires == 0)
423 		mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
424 }
425 
426 /*
427  *	Add routing information to the routing tree.
428  *	<destination addr>/<source addr>
429  *	with source addr info in sub-trees
430  */
431 
432 int fib6_add(struct fib6_node *root, struct rt6_info *rt,
433 		struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
434 {
435 	struct fib6_node *fn;
436 	int err = -ENOMEM;
437 
438 	fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr),
439 			rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst));
440 
441 	if (fn == NULL)
442 		goto out;
443 
444 #ifdef CONFIG_IPV6_SUBTREES
445 	if (rt->rt6i_src.plen) {
446 		struct fib6_node *sn;
447 
448 		if (fn->subtree == NULL) {
449 			struct fib6_node *sfn;
450 
451 			/*
452 			 * Create subtree.
453 			 *
454 			 *		fn[main tree]
455 			 *		|
456 			 *		sfn[subtree root]
457 			 *		   \
458 			 *		    sn[new leaf node]
459 			 */
460 
461 			/* Create subtree root node */
462 			sfn = node_alloc();
463 			if (sfn == NULL)
464 				goto st_failure;
465 
466 			sfn->leaf = &ip6_null_entry;
467 			atomic_inc(&ip6_null_entry.rt6i_ref);
468 			sfn->fn_flags = RTN_ROOT;
469 			sfn->fn_sernum = fib6_new_sernum();
470 
471 			/* Now add the first leaf node to new subtree */
472 
473 			sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
474 					sizeof(struct in6_addr), rt->rt6i_src.plen,
475 					offsetof(struct rt6_info, rt6i_src));
476 
477 			if (sn == NULL) {
478 				/* If it is failed, discard just allocated
479 				   root, and then (in st_failure) stale node
480 				   in main tree.
481 				 */
482 				node_free(sfn);
483 				goto st_failure;
484 			}
485 
486 			/* Now link new subtree to main tree */
487 			sfn->parent = fn;
488 			fn->subtree = sfn;
489 			if (fn->leaf == NULL) {
490 				fn->leaf = rt;
491 				atomic_inc(&rt->rt6i_ref);
492 			}
493 		} else {
494 			sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
495 					sizeof(struct in6_addr), rt->rt6i_src.plen,
496 					offsetof(struct rt6_info, rt6i_src));
497 
498 			if (sn == NULL)
499 				goto st_failure;
500 		}
501 
502 		fn = sn;
503 	}
504 #endif
505 
506 	err = fib6_add_rt2node(fn, rt, nlh, req);
507 
508 	if (err == 0) {
509 		fib6_start_gc(rt);
510 		if (!(rt->rt6i_flags&RTF_CACHE))
511 			fib6_prune_clones(fn, rt);
512 	}
513 
514 out:
515 	if (err)
516 		dst_free(&rt->u.dst);
517 	return err;
518 
519 #ifdef CONFIG_IPV6_SUBTREES
520 	/* Subtree creation failed, probably main tree node
521 	   is orphan. If it is, shoot it.
522 	 */
523 st_failure:
524 	if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
525 		fib6_repair_tree(fn);
526 	dst_free(&rt->u.dst);
527 	return err;
528 #endif
529 }
530 
531 /*
532  *	Routing tree lookup
533  *
534  */
535 
536 struct lookup_args {
537 	int		offset;		/* key offset on rt6_info	*/
538 	struct in6_addr	*addr;		/* search key			*/
539 };
540 
541 static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
542 					struct lookup_args *args)
543 {
544 	struct fib6_node *fn;
545 	int dir;
546 
547 	/*
548 	 *	Descend on a tree
549 	 */
550 
551 	fn = root;
552 
553 	for (;;) {
554 		struct fib6_node *next;
555 
556 		dir = addr_bit_set(args->addr, fn->fn_bit);
557 
558 		next = dir ? fn->right : fn->left;
559 
560 		if (next) {
561 			fn = next;
562 			continue;
563 		}
564 
565 		break;
566 	}
567 
568 	while ((fn->fn_flags & RTN_ROOT) == 0) {
569 #ifdef CONFIG_IPV6_SUBTREES
570 		if (fn->subtree) {
571 			struct fib6_node *st;
572 			struct lookup_args *narg;
573 
574 			narg = args + 1;
575 
576 			if (narg->addr) {
577 				st = fib6_lookup_1(fn->subtree, narg);
578 
579 				if (st && !(st->fn_flags & RTN_ROOT))
580 					return st;
581 			}
582 		}
583 #endif
584 
585 		if (fn->fn_flags & RTN_RTINFO) {
586 			struct rt6key *key;
587 
588 			key = (struct rt6key *) ((u8 *) fn->leaf +
589 						 args->offset);
590 
591 			if (ipv6_prefix_equal(&key->addr, args->addr, key->plen))
592 				return fn;
593 		}
594 
595 		fn = fn->parent;
596 	}
597 
598 	return NULL;
599 }
600 
601 struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr,
602 			       struct in6_addr *saddr)
603 {
604 	struct lookup_args args[2];
605 	struct fib6_node *fn;
606 
607 	args[0].offset = offsetof(struct rt6_info, rt6i_dst);
608 	args[0].addr = daddr;
609 
610 #ifdef CONFIG_IPV6_SUBTREES
611 	args[1].offset = offsetof(struct rt6_info, rt6i_src);
612 	args[1].addr = saddr;
613 #endif
614 
615 	fn = fib6_lookup_1(root, args);
616 
617 	if (fn == NULL || fn->fn_flags & RTN_TL_ROOT)
618 		fn = root;
619 
620 	return fn;
621 }
622 
623 /*
624  *	Get node with specified destination prefix (and source prefix,
625  *	if subtrees are used)
626  */
627 
628 
629 static struct fib6_node * fib6_locate_1(struct fib6_node *root,
630 					struct in6_addr *addr,
631 					int plen, int offset)
632 {
633 	struct fib6_node *fn;
634 
635 	for (fn = root; fn ; ) {
636 		struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset);
637 
638 		/*
639 		 *	Prefix match
640 		 */
641 		if (plen < fn->fn_bit ||
642 		    !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
643 			return NULL;
644 
645 		if (plen == fn->fn_bit)
646 			return fn;
647 
648 		/*
649 		 *	We have more bits to go
650 		 */
651 		if (addr_bit_set(addr, fn->fn_bit))
652 			fn = fn->right;
653 		else
654 			fn = fn->left;
655 	}
656 	return NULL;
657 }
658 
659 struct fib6_node * fib6_locate(struct fib6_node *root,
660 			       struct in6_addr *daddr, int dst_len,
661 			       struct in6_addr *saddr, int src_len)
662 {
663 	struct fib6_node *fn;
664 
665 	fn = fib6_locate_1(root, daddr, dst_len,
666 			   offsetof(struct rt6_info, rt6i_dst));
667 
668 #ifdef CONFIG_IPV6_SUBTREES
669 	if (src_len) {
670 		BUG_TRAP(saddr!=NULL);
671 		if (fn == NULL)
672 			fn = fn->subtree;
673 		if (fn)
674 			fn = fib6_locate_1(fn, saddr, src_len,
675 					   offsetof(struct rt6_info, rt6i_src));
676 	}
677 #endif
678 
679 	if (fn && fn->fn_flags&RTN_RTINFO)
680 		return fn;
681 
682 	return NULL;
683 }
684 
685 
686 /*
687  *	Deletion
688  *
689  */
690 
691 static struct rt6_info * fib6_find_prefix(struct fib6_node *fn)
692 {
693 	if (fn->fn_flags&RTN_ROOT)
694 		return &ip6_null_entry;
695 
696 	while(fn) {
697 		if(fn->left)
698 			return fn->left->leaf;
699 
700 		if(fn->right)
701 			return fn->right->leaf;
702 
703 		fn = SUBTREE(fn);
704 	}
705 	return NULL;
706 }
707 
708 /*
709  *	Called to trim the tree of intermediate nodes when possible. "fn"
710  *	is the node we want to try and remove.
711  */
712 
713 static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
714 {
715 	int children;
716 	int nstate;
717 	struct fib6_node *child, *pn;
718 	struct fib6_walker_t *w;
719 	int iter = 0;
720 
721 	for (;;) {
722 		RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
723 		iter++;
724 
725 		BUG_TRAP(!(fn->fn_flags&RTN_RTINFO));
726 		BUG_TRAP(!(fn->fn_flags&RTN_TL_ROOT));
727 		BUG_TRAP(fn->leaf==NULL);
728 
729 		children = 0;
730 		child = NULL;
731 		if (fn->right) child = fn->right, children |= 1;
732 		if (fn->left) child = fn->left, children |= 2;
733 
734 		if (children == 3 || SUBTREE(fn)
735 #ifdef CONFIG_IPV6_SUBTREES
736 		    /* Subtree root (i.e. fn) may have one child */
737 		    || (children && fn->fn_flags&RTN_ROOT)
738 #endif
739 		    ) {
740 			fn->leaf = fib6_find_prefix(fn);
741 #if RT6_DEBUG >= 2
742 			if (fn->leaf==NULL) {
743 				BUG_TRAP(fn->leaf);
744 				fn->leaf = &ip6_null_entry;
745 			}
746 #endif
747 			atomic_inc(&fn->leaf->rt6i_ref);
748 			return fn->parent;
749 		}
750 
751 		pn = fn->parent;
752 #ifdef CONFIG_IPV6_SUBTREES
753 		if (SUBTREE(pn) == fn) {
754 			BUG_TRAP(fn->fn_flags&RTN_ROOT);
755 			SUBTREE(pn) = NULL;
756 			nstate = FWS_L;
757 		} else {
758 			BUG_TRAP(!(fn->fn_flags&RTN_ROOT));
759 #endif
760 			if (pn->right == fn) pn->right = child;
761 			else if (pn->left == fn) pn->left = child;
762 #if RT6_DEBUG >= 2
763 			else BUG_TRAP(0);
764 #endif
765 			if (child)
766 				child->parent = pn;
767 			nstate = FWS_R;
768 #ifdef CONFIG_IPV6_SUBTREES
769 		}
770 #endif
771 
772 		read_lock(&fib6_walker_lock);
773 		FOR_WALKERS(w) {
774 			if (child == NULL) {
775 				if (w->root == fn) {
776 					w->root = w->node = NULL;
777 					RT6_TRACE("W %p adjusted by delroot 1\n", w);
778 				} else if (w->node == fn) {
779 					RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
780 					w->node = pn;
781 					w->state = nstate;
782 				}
783 			} else {
784 				if (w->root == fn) {
785 					w->root = child;
786 					RT6_TRACE("W %p adjusted by delroot 2\n", w);
787 				}
788 				if (w->node == fn) {
789 					w->node = child;
790 					if (children&2) {
791 						RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
792 						w->state = w->state>=FWS_R ? FWS_U : FWS_INIT;
793 					} else {
794 						RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
795 						w->state = w->state>=FWS_C ? FWS_U : FWS_INIT;
796 					}
797 				}
798 			}
799 		}
800 		read_unlock(&fib6_walker_lock);
801 
802 		node_free(fn);
803 		if (pn->fn_flags&RTN_RTINFO || SUBTREE(pn))
804 			return pn;
805 
806 		rt6_release(pn->leaf);
807 		pn->leaf = NULL;
808 		fn = pn;
809 	}
810 }
811 
812 static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
813     struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
814 {
815 	struct fib6_walker_t *w;
816 	struct rt6_info *rt = *rtp;
817 
818 	RT6_TRACE("fib6_del_route\n");
819 
820 	/* Unlink it */
821 	*rtp = rt->u.next;
822 	rt->rt6i_node = NULL;
823 	rt6_stats.fib_rt_entries--;
824 	rt6_stats.fib_discarded_routes++;
825 
826 	/* Adjust walkers */
827 	read_lock(&fib6_walker_lock);
828 	FOR_WALKERS(w) {
829 		if (w->state == FWS_C && w->leaf == rt) {
830 			RT6_TRACE("walker %p adjusted by delroute\n", w);
831 			w->leaf = rt->u.next;
832 			if (w->leaf == NULL)
833 				w->state = FWS_U;
834 		}
835 	}
836 	read_unlock(&fib6_walker_lock);
837 
838 	rt->u.next = NULL;
839 
840 	if (fn->leaf == NULL && fn->fn_flags&RTN_TL_ROOT)
841 		fn->leaf = &ip6_null_entry;
842 
843 	/* If it was last route, expunge its radix tree node */
844 	if (fn->leaf == NULL) {
845 		fn->fn_flags &= ~RTN_RTINFO;
846 		rt6_stats.fib_route_nodes--;
847 		fn = fib6_repair_tree(fn);
848 	}
849 
850 	if (atomic_read(&rt->rt6i_ref) != 1) {
851 		/* This route is used as dummy address holder in some split
852 		 * nodes. It is not leaked, but it still holds other resources,
853 		 * which must be released in time. So, scan ascendant nodes
854 		 * and replace dummy references to this route with references
855 		 * to still alive ones.
856 		 */
857 		while (fn) {
858 			if (!(fn->fn_flags&RTN_RTINFO) && fn->leaf == rt) {
859 				fn->leaf = fib6_find_prefix(fn);
860 				atomic_inc(&fn->leaf->rt6i_ref);
861 				rt6_release(rt);
862 			}
863 			fn = fn->parent;
864 		}
865 		/* No more references are possible at this point. */
866 		if (atomic_read(&rt->rt6i_ref) != 1) BUG();
867 	}
868 
869 	inet6_rt_notify(RTM_DELROUTE, rt, nlh, req);
870 	rt6_release(rt);
871 }
872 
873 int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
874 {
875 	struct fib6_node *fn = rt->rt6i_node;
876 	struct rt6_info **rtp;
877 
878 #if RT6_DEBUG >= 2
879 	if (rt->u.dst.obsolete>0) {
880 		BUG_TRAP(fn==NULL);
881 		return -ENOENT;
882 	}
883 #endif
884 	if (fn == NULL || rt == &ip6_null_entry)
885 		return -ENOENT;
886 
887 	BUG_TRAP(fn->fn_flags&RTN_RTINFO);
888 
889 	if (!(rt->rt6i_flags&RTF_CACHE))
890 		fib6_prune_clones(fn, rt);
891 
892 	/*
893 	 *	Walk the leaf entries looking for ourself
894 	 */
895 
896 	for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) {
897 		if (*rtp == rt) {
898 			fib6_del_route(fn, rtp, nlh, _rtattr, req);
899 			return 0;
900 		}
901 	}
902 	return -ENOENT;
903 }
904 
905 /*
906  *	Tree traversal function.
907  *
908  *	Certainly, it is not interrupt safe.
909  *	However, it is internally reenterable wrt itself and fib6_add/fib6_del.
910  *	It means, that we can modify tree during walking
911  *	and use this function for garbage collection, clone pruning,
912  *	cleaning tree when a device goes down etc. etc.
913  *
914  *	It guarantees that every node will be traversed,
915  *	and that it will be traversed only once.
916  *
917  *	Callback function w->func may return:
918  *	0 -> continue walking.
919  *	positive value -> walking is suspended (used by tree dumps,
920  *	and probably by gc, if it will be split to several slices)
921  *	negative value -> terminate walking.
922  *
923  *	The function itself returns:
924  *	0   -> walk is complete.
925  *	>0  -> walk is incomplete (i.e. suspended)
926  *	<0  -> walk is terminated by an error.
927  */
928 
929 int fib6_walk_continue(struct fib6_walker_t *w)
930 {
931 	struct fib6_node *fn, *pn;
932 
933 	for (;;) {
934 		fn = w->node;
935 		if (fn == NULL)
936 			return 0;
937 
938 		if (w->prune && fn != w->root &&
939 		    fn->fn_flags&RTN_RTINFO && w->state < FWS_C) {
940 			w->state = FWS_C;
941 			w->leaf = fn->leaf;
942 		}
943 		switch (w->state) {
944 #ifdef CONFIG_IPV6_SUBTREES
945 		case FWS_S:
946 			if (SUBTREE(fn)) {
947 				w->node = SUBTREE(fn);
948 				continue;
949 			}
950 			w->state = FWS_L;
951 #endif
952 		case FWS_L:
953 			if (fn->left) {
954 				w->node = fn->left;
955 				w->state = FWS_INIT;
956 				continue;
957 			}
958 			w->state = FWS_R;
959 		case FWS_R:
960 			if (fn->right) {
961 				w->node = fn->right;
962 				w->state = FWS_INIT;
963 				continue;
964 			}
965 			w->state = FWS_C;
966 			w->leaf = fn->leaf;
967 		case FWS_C:
968 			if (w->leaf && fn->fn_flags&RTN_RTINFO) {
969 				int err = w->func(w);
970 				if (err)
971 					return err;
972 				continue;
973 			}
974 			w->state = FWS_U;
975 		case FWS_U:
976 			if (fn == w->root)
977 				return 0;
978 			pn = fn->parent;
979 			w->node = pn;
980 #ifdef CONFIG_IPV6_SUBTREES
981 			if (SUBTREE(pn) == fn) {
982 				BUG_TRAP(fn->fn_flags&RTN_ROOT);
983 				w->state = FWS_L;
984 				continue;
985 			}
986 #endif
987 			if (pn->left == fn) {
988 				w->state = FWS_R;
989 				continue;
990 			}
991 			if (pn->right == fn) {
992 				w->state = FWS_C;
993 				w->leaf = w->node->leaf;
994 				continue;
995 			}
996 #if RT6_DEBUG >= 2
997 			BUG_TRAP(0);
998 #endif
999 		}
1000 	}
1001 }
1002 
1003 int fib6_walk(struct fib6_walker_t *w)
1004 {
1005 	int res;
1006 
1007 	w->state = FWS_INIT;
1008 	w->node = w->root;
1009 
1010 	fib6_walker_link(w);
1011 	res = fib6_walk_continue(w);
1012 	if (res <= 0)
1013 		fib6_walker_unlink(w);
1014 	return res;
1015 }
1016 
1017 static int fib6_clean_node(struct fib6_walker_t *w)
1018 {
1019 	int res;
1020 	struct rt6_info *rt;
1021 	struct fib6_cleaner_t *c = (struct fib6_cleaner_t*)w;
1022 
1023 	for (rt = w->leaf; rt; rt = rt->u.next) {
1024 		res = c->func(rt, c->arg);
1025 		if (res < 0) {
1026 			w->leaf = rt;
1027 			res = fib6_del(rt, NULL, NULL, NULL);
1028 			if (res) {
1029 #if RT6_DEBUG >= 2
1030 				printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
1031 #endif
1032 				continue;
1033 			}
1034 			return 0;
1035 		}
1036 		BUG_TRAP(res==0);
1037 	}
1038 	w->leaf = rt;
1039 	return 0;
1040 }
1041 
1042 /*
1043  *	Convenient frontend to tree walker.
1044  *
1045  *	func is called on each route.
1046  *		It may return -1 -> delete this route.
1047  *		              0  -> continue walking
1048  *
1049  *	prune==1 -> only immediate children of node (certainly,
1050  *	ignoring pure split nodes) will be scanned.
1051  */
1052 
1053 void fib6_clean_tree(struct fib6_node *root,
1054 		     int (*func)(struct rt6_info *, void *arg),
1055 		     int prune, void *arg)
1056 {
1057 	struct fib6_cleaner_t c;
1058 
1059 	c.w.root = root;
1060 	c.w.func = fib6_clean_node;
1061 	c.w.prune = prune;
1062 	c.func = func;
1063 	c.arg = arg;
1064 
1065 	fib6_walk(&c.w);
1066 }
1067 
1068 static int fib6_prune_clone(struct rt6_info *rt, void *arg)
1069 {
1070 	if (rt->rt6i_flags & RTF_CACHE) {
1071 		RT6_TRACE("pruning clone %p\n", rt);
1072 		return -1;
1073 	}
1074 
1075 	return 0;
1076 }
1077 
1078 static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt)
1079 {
1080 	fib6_clean_tree(fn, fib6_prune_clone, 1, rt);
1081 }
1082 
1083 /*
1084  *	Garbage collection
1085  */
1086 
1087 static struct fib6_gc_args
1088 {
1089 	int			timeout;
1090 	int			more;
1091 } gc_args;
1092 
1093 static int fib6_age(struct rt6_info *rt, void *arg)
1094 {
1095 	unsigned long now = jiffies;
1096 
1097 	/*
1098 	 *	check addrconf expiration here.
1099 	 *	Routes are expired even if they are in use.
1100 	 *
1101 	 *	Also age clones. Note, that clones are aged out
1102 	 *	only if they are not in use now.
1103 	 */
1104 
1105 	if (rt->rt6i_flags&RTF_EXPIRES && rt->rt6i_expires) {
1106 		if (time_after(now, rt->rt6i_expires)) {
1107 			RT6_TRACE("expiring %p\n", rt);
1108 			return -1;
1109 		}
1110 		gc_args.more++;
1111 	} else if (rt->rt6i_flags & RTF_CACHE) {
1112 		if (atomic_read(&rt->u.dst.__refcnt) == 0 &&
1113 		    time_after_eq(now, rt->u.dst.lastuse + gc_args.timeout)) {
1114 			RT6_TRACE("aging clone %p\n", rt);
1115 			return -1;
1116 		} else if ((rt->rt6i_flags & RTF_GATEWAY) &&
1117 			   (!(rt->rt6i_nexthop->flags & NTF_ROUTER))) {
1118 			RT6_TRACE("purging route %p via non-router but gateway\n",
1119 				  rt);
1120 			return -1;
1121 		}
1122 		gc_args.more++;
1123 	}
1124 
1125 	return 0;
1126 }
1127 
1128 static DEFINE_SPINLOCK(fib6_gc_lock);
1129 
1130 void fib6_run_gc(unsigned long dummy)
1131 {
1132 	if (dummy != ~0UL) {
1133 		spin_lock_bh(&fib6_gc_lock);
1134 		gc_args.timeout = dummy ? (int)dummy : ip6_rt_gc_interval;
1135 	} else {
1136 		local_bh_disable();
1137 		if (!spin_trylock(&fib6_gc_lock)) {
1138 			mod_timer(&ip6_fib_timer, jiffies + HZ);
1139 			local_bh_enable();
1140 			return;
1141 		}
1142 		gc_args.timeout = ip6_rt_gc_interval;
1143 	}
1144 	gc_args.more = 0;
1145 
1146 
1147 	write_lock_bh(&rt6_lock);
1148 	ndisc_dst_gc(&gc_args.more);
1149 	fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL);
1150 	write_unlock_bh(&rt6_lock);
1151 
1152 	if (gc_args.more)
1153 		mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
1154 	else {
1155 		del_timer(&ip6_fib_timer);
1156 		ip6_fib_timer.expires = 0;
1157 	}
1158 	spin_unlock_bh(&fib6_gc_lock);
1159 }
1160 
1161 void __init fib6_init(void)
1162 {
1163 	fib6_node_kmem = kmem_cache_create("fib6_nodes",
1164 					   sizeof(struct fib6_node),
1165 					   0, SLAB_HWCACHE_ALIGN,
1166 					   NULL, NULL);
1167 	if (!fib6_node_kmem)
1168 		panic("cannot create fib6_nodes cache");
1169 }
1170 
1171 void fib6_gc_cleanup(void)
1172 {
1173 	del_timer(&ip6_fib_timer);
1174 	kmem_cache_destroy(fib6_node_kmem);
1175 }
1176