xref: /linux/net/ipv6/route.c (revision 9ce7677cfd7cd871adb457c80bea3b581b839641)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	$Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *	This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15 
16 /*	Changes:
17  *
18  *	YOSHIFUJI Hideaki @USAGI
19  *		reworked default router selection.
20  *		- respect outgoing interface
21  *		- select from (probably) reachable routers (i.e.
22  *		routers in REACHABLE, STALE, DELAY or PROBE states).
23  *		- always select the same router if it is (probably)
24  *		reachable.  otherwise, round-robin the list.
25  */
26 
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
40 
41 #ifdef 	CONFIG_PROC_FS
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
44 #endif
45 
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56 
57 #include <asm/uaccess.h>
58 
59 #ifdef CONFIG_SYSCTL
60 #include <linux/sysctl.h>
61 #endif
62 
63 /* Set to 3 to get tracing. */
64 #define RT6_DEBUG 2
65 
66 #if RT6_DEBUG >= 3
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #else
70 #define RDBG(x)
71 #define RT6_TRACE(x...) do { ; } while (0)
72 #endif
73 
74 
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
82 
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void		ip6_dst_destroy(struct dst_entry *);
87 static void		ip6_dst_ifdown(struct dst_entry *,
88 				       struct net_device *dev, int how);
89 static int		 ip6_dst_gc(void);
90 
91 static int		ip6_pkt_discard(struct sk_buff *skb);
92 static int		ip6_pkt_discard_out(struct sk_buff *skb);
93 static void		ip6_link_failure(struct sk_buff *skb);
94 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
95 
96 static struct dst_ops ip6_dst_ops = {
97 	.family			=	AF_INET6,
98 	.protocol		=	__constant_htons(ETH_P_IPV6),
99 	.gc			=	ip6_dst_gc,
100 	.gc_thresh		=	1024,
101 	.check			=	ip6_dst_check,
102 	.destroy		=	ip6_dst_destroy,
103 	.ifdown			=	ip6_dst_ifdown,
104 	.negative_advice	=	ip6_negative_advice,
105 	.link_failure		=	ip6_link_failure,
106 	.update_pmtu		=	ip6_rt_update_pmtu,
107 	.entry_size		=	sizeof(struct rt6_info),
108 };
109 
110 struct rt6_info ip6_null_entry = {
111 	.u = {
112 		.dst = {
113 			.__refcnt	= ATOMIC_INIT(1),
114 			.__use		= 1,
115 			.dev		= &loopback_dev,
116 			.obsolete	= -1,
117 			.error		= -ENETUNREACH,
118 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
119 			.input		= ip6_pkt_discard,
120 			.output		= ip6_pkt_discard_out,
121 			.ops		= &ip6_dst_ops,
122 			.path		= (struct dst_entry*)&ip6_null_entry,
123 		}
124 	},
125 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
126 	.rt6i_metric	= ~(u32) 0,
127 	.rt6i_ref	= ATOMIC_INIT(1),
128 };
129 
130 struct fib6_node ip6_routing_table = {
131 	.leaf		= &ip6_null_entry,
132 	.fn_flags	= RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
133 };
134 
135 /* Protects all the ip6 fib */
136 
137 DEFINE_RWLOCK(rt6_lock);
138 
139 
140 /* allocate dst with ip6_dst_ops */
141 static __inline__ struct rt6_info *ip6_dst_alloc(void)
142 {
143 	return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
144 }
145 
146 static void ip6_dst_destroy(struct dst_entry *dst)
147 {
148 	struct rt6_info *rt = (struct rt6_info *)dst;
149 	struct inet6_dev *idev = rt->rt6i_idev;
150 
151 	if (idev != NULL) {
152 		rt->rt6i_idev = NULL;
153 		in6_dev_put(idev);
154 	}
155 }
156 
157 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
158 			   int how)
159 {
160 	struct rt6_info *rt = (struct rt6_info *)dst;
161 	struct inet6_dev *idev = rt->rt6i_idev;
162 
163 	if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
164 		struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
165 		if (loopback_idev != NULL) {
166 			rt->rt6i_idev = loopback_idev;
167 			in6_dev_put(idev);
168 		}
169 	}
170 }
171 
172 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
173 {
174 	return (rt->rt6i_flags & RTF_EXPIRES &&
175 		time_after(jiffies, rt->rt6i_expires));
176 }
177 
178 /*
179  *	Route lookup. Any rt6_lock is implied.
180  */
181 
182 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
183 						    int oif,
184 						    int strict)
185 {
186 	struct rt6_info *local = NULL;
187 	struct rt6_info *sprt;
188 
189 	if (oif) {
190 		for (sprt = rt; sprt; sprt = sprt->u.next) {
191 			struct net_device *dev = sprt->rt6i_dev;
192 			if (dev->ifindex == oif)
193 				return sprt;
194 			if (dev->flags & IFF_LOOPBACK) {
195 				if (sprt->rt6i_idev == NULL ||
196 				    sprt->rt6i_idev->dev->ifindex != oif) {
197 					if (strict && oif)
198 						continue;
199 					if (local && (!oif ||
200 						      local->rt6i_idev->dev->ifindex == oif))
201 						continue;
202 				}
203 				local = sprt;
204 			}
205 		}
206 
207 		if (local)
208 			return local;
209 
210 		if (strict)
211 			return &ip6_null_entry;
212 	}
213 	return rt;
214 }
215 
216 /*
217  *	pointer to the last default router chosen. BH is disabled locally.
218  */
219 static struct rt6_info *rt6_dflt_pointer;
220 static DEFINE_SPINLOCK(rt6_dflt_lock);
221 
222 void rt6_reset_dflt_pointer(struct rt6_info *rt)
223 {
224 	spin_lock_bh(&rt6_dflt_lock);
225 	if (rt == NULL || rt == rt6_dflt_pointer) {
226 		RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
227 		rt6_dflt_pointer = NULL;
228 	}
229 	spin_unlock_bh(&rt6_dflt_lock);
230 }
231 
232 /* Default Router Selection (RFC 2461 6.3.6) */
233 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
234 {
235 	struct rt6_info *match = NULL;
236 	struct rt6_info *sprt;
237 	int mpri = 0;
238 
239 	for (sprt = rt; sprt; sprt = sprt->u.next) {
240 		struct neighbour *neigh;
241 		int m = 0;
242 
243 		if (!oif ||
244 		    (sprt->rt6i_dev &&
245 		     sprt->rt6i_dev->ifindex == oif))
246 			m += 8;
247 
248 		if (rt6_check_expired(sprt))
249 			continue;
250 
251 		if (sprt == rt6_dflt_pointer)
252 			m += 4;
253 
254 		if ((neigh = sprt->rt6i_nexthop) != NULL) {
255 			read_lock_bh(&neigh->lock);
256 			switch (neigh->nud_state) {
257 			case NUD_REACHABLE:
258 				m += 3;
259 				break;
260 
261 			case NUD_STALE:
262 			case NUD_DELAY:
263 			case NUD_PROBE:
264 				m += 2;
265 				break;
266 
267 			case NUD_NOARP:
268 			case NUD_PERMANENT:
269 				m += 1;
270 				break;
271 
272 			case NUD_INCOMPLETE:
273 			default:
274 				read_unlock_bh(&neigh->lock);
275 				continue;
276 			}
277 			read_unlock_bh(&neigh->lock);
278 		} else {
279 			continue;
280 		}
281 
282 		if (m > mpri || m >= 12) {
283 			match = sprt;
284 			mpri = m;
285 			if (m >= 12) {
286 				/* we choose the last default router if it
287 				 * is in (probably) reachable state.
288 				 * If route changed, we should do pmtu
289 				 * discovery. --yoshfuji
290 				 */
291 				break;
292 			}
293 		}
294 	}
295 
296 	spin_lock(&rt6_dflt_lock);
297 	if (!match) {
298 		/*
299 		 *	No default routers are known to be reachable.
300 		 *	SHOULD round robin
301 		 */
302 		if (rt6_dflt_pointer) {
303 			for (sprt = rt6_dflt_pointer->u.next;
304 			     sprt; sprt = sprt->u.next) {
305 				if (sprt->u.dst.obsolete <= 0 &&
306 				    sprt->u.dst.error == 0 &&
307 				    !rt6_check_expired(sprt)) {
308 					match = sprt;
309 					break;
310 				}
311 			}
312 			for (sprt = rt;
313 			     !match && sprt;
314 			     sprt = sprt->u.next) {
315 				if (sprt->u.dst.obsolete <= 0 &&
316 				    sprt->u.dst.error == 0 &&
317 				    !rt6_check_expired(sprt)) {
318 					match = sprt;
319 					break;
320 				}
321 				if (sprt == rt6_dflt_pointer)
322 					break;
323 			}
324 		}
325 	}
326 
327 	if (match) {
328 		if (rt6_dflt_pointer != match)
329 			RT6_TRACE("changed default router: %p->%p\n",
330 				  rt6_dflt_pointer, match);
331 		rt6_dflt_pointer = match;
332 	}
333 	spin_unlock(&rt6_dflt_lock);
334 
335 	if (!match) {
336 		/*
337 		 * Last Resort: if no default routers found,
338 		 * use addrconf default route.
339 		 * We don't record this route.
340 		 */
341 		for (sprt = ip6_routing_table.leaf;
342 		     sprt; sprt = sprt->u.next) {
343 			if (!rt6_check_expired(sprt) &&
344 			    (sprt->rt6i_flags & RTF_DEFAULT) &&
345 			    (!oif ||
346 			     (sprt->rt6i_dev &&
347 			      sprt->rt6i_dev->ifindex == oif))) {
348 				match = sprt;
349 				break;
350 			}
351 		}
352 		if (!match) {
353 			/* no default route.  give up. */
354 			match = &ip6_null_entry;
355 		}
356 	}
357 
358 	return match;
359 }
360 
361 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
362 			    int oif, int strict)
363 {
364 	struct fib6_node *fn;
365 	struct rt6_info *rt;
366 
367 	read_lock_bh(&rt6_lock);
368 	fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
369 	rt = rt6_device_match(fn->leaf, oif, strict);
370 	dst_hold(&rt->u.dst);
371 	rt->u.dst.__use++;
372 	read_unlock_bh(&rt6_lock);
373 
374 	rt->u.dst.lastuse = jiffies;
375 	if (rt->u.dst.error == 0)
376 		return rt;
377 	dst_release(&rt->u.dst);
378 	return NULL;
379 }
380 
381 /* ip6_ins_rt is called with FREE rt6_lock.
382    It takes new route entry, the addition fails by any reason the
383    route is freed. In any case, if caller does not hold it, it may
384    be destroyed.
385  */
386 
387 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
388 		void *_rtattr, struct netlink_skb_parms *req)
389 {
390 	int err;
391 
392 	write_lock_bh(&rt6_lock);
393 	err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
394 	write_unlock_bh(&rt6_lock);
395 
396 	return err;
397 }
398 
399 /* No rt6_lock! If COW failed, the function returns dead route entry
400    with dst->error set to errno value.
401  */
402 
403 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
404 				struct in6_addr *saddr, struct netlink_skb_parms *req)
405 {
406 	int err;
407 	struct rt6_info *rt;
408 
409 	/*
410 	 *	Clone the route.
411 	 */
412 
413 	rt = ip6_rt_copy(ort);
414 
415 	if (rt) {
416 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
417 
418 		if (!(rt->rt6i_flags&RTF_GATEWAY))
419 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
420 
421 		rt->rt6i_dst.plen = 128;
422 		rt->rt6i_flags |= RTF_CACHE;
423 		rt->u.dst.flags |= DST_HOST;
424 
425 #ifdef CONFIG_IPV6_SUBTREES
426 		if (rt->rt6i_src.plen && saddr) {
427 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
428 			rt->rt6i_src.plen = 128;
429 		}
430 #endif
431 
432 		rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
433 
434 		dst_hold(&rt->u.dst);
435 
436 		err = ip6_ins_rt(rt, NULL, NULL, req);
437 		if (err == 0)
438 			return rt;
439 
440 		rt->u.dst.error = err;
441 
442 		return rt;
443 	}
444 	dst_hold(&ip6_null_entry.u.dst);
445 	return &ip6_null_entry;
446 }
447 
448 #define BACKTRACK() \
449 if (rt == &ip6_null_entry && strict) { \
450        while ((fn = fn->parent) != NULL) { \
451 		if (fn->fn_flags & RTN_ROOT) { \
452 			dst_hold(&rt->u.dst); \
453 			goto out; \
454 		} \
455 		if (fn->fn_flags & RTN_RTINFO) \
456 			goto restart; \
457 	} \
458 }
459 
460 
461 void ip6_route_input(struct sk_buff *skb)
462 {
463 	struct fib6_node *fn;
464 	struct rt6_info *rt;
465 	int strict;
466 	int attempts = 3;
467 
468 	strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
469 
470 relookup:
471 	read_lock_bh(&rt6_lock);
472 
473 	fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
474 			 &skb->nh.ipv6h->saddr);
475 
476 restart:
477 	rt = fn->leaf;
478 
479 	if ((rt->rt6i_flags & RTF_CACHE)) {
480 		rt = rt6_device_match(rt, skb->dev->ifindex, strict);
481 		BACKTRACK();
482 		dst_hold(&rt->u.dst);
483 		goto out;
484 	}
485 
486 	rt = rt6_device_match(rt, skb->dev->ifindex, strict);
487 	BACKTRACK();
488 
489 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
490 		struct rt6_info *nrt;
491 		dst_hold(&rt->u.dst);
492 		read_unlock_bh(&rt6_lock);
493 
494 		nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
495 			      &skb->nh.ipv6h->saddr,
496 			      &NETLINK_CB(skb));
497 
498 		dst_release(&rt->u.dst);
499 		rt = nrt;
500 
501 		if (rt->u.dst.error != -EEXIST || --attempts <= 0)
502 			goto out2;
503 
504 		/* Race condition! In the gap, when rt6_lock was
505 		   released someone could insert this route.  Relookup.
506 		*/
507 		dst_release(&rt->u.dst);
508 		goto relookup;
509 	}
510 	dst_hold(&rt->u.dst);
511 
512 out:
513 	read_unlock_bh(&rt6_lock);
514 out2:
515 	rt->u.dst.lastuse = jiffies;
516 	rt->u.dst.__use++;
517 	skb->dst = (struct dst_entry *) rt;
518 }
519 
520 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
521 {
522 	struct fib6_node *fn;
523 	struct rt6_info *rt;
524 	int strict;
525 	int attempts = 3;
526 
527 	strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
528 
529 relookup:
530 	read_lock_bh(&rt6_lock);
531 
532 	fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
533 
534 restart:
535 	rt = fn->leaf;
536 
537 	if ((rt->rt6i_flags & RTF_CACHE)) {
538 		rt = rt6_device_match(rt, fl->oif, strict);
539 		BACKTRACK();
540 		dst_hold(&rt->u.dst);
541 		goto out;
542 	}
543 	if (rt->rt6i_flags & RTF_DEFAULT) {
544 		if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
545 			rt = rt6_best_dflt(rt, fl->oif);
546 	} else {
547 		rt = rt6_device_match(rt, fl->oif, strict);
548 		BACKTRACK();
549 	}
550 
551 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
552 		struct rt6_info *nrt;
553 		dst_hold(&rt->u.dst);
554 		read_unlock_bh(&rt6_lock);
555 
556 		nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL);
557 
558 		dst_release(&rt->u.dst);
559 		rt = nrt;
560 
561 		if (rt->u.dst.error != -EEXIST || --attempts <= 0)
562 			goto out2;
563 
564 		/* Race condition! In the gap, when rt6_lock was
565 		   released someone could insert this route.  Relookup.
566 		*/
567 		dst_release(&rt->u.dst);
568 		goto relookup;
569 	}
570 	dst_hold(&rt->u.dst);
571 
572 out:
573 	read_unlock_bh(&rt6_lock);
574 out2:
575 	rt->u.dst.lastuse = jiffies;
576 	rt->u.dst.__use++;
577 	return &rt->u.dst;
578 }
579 
580 
581 /*
582  *	Destination cache support functions
583  */
584 
585 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
586 {
587 	struct rt6_info *rt;
588 
589 	rt = (struct rt6_info *) dst;
590 
591 	if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
592 		return dst;
593 
594 	return NULL;
595 }
596 
597 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
598 {
599 	struct rt6_info *rt = (struct rt6_info *) dst;
600 
601 	if (rt) {
602 		if (rt->rt6i_flags & RTF_CACHE)
603 			ip6_del_rt(rt, NULL, NULL, NULL);
604 		else
605 			dst_release(dst);
606 	}
607 	return NULL;
608 }
609 
610 static void ip6_link_failure(struct sk_buff *skb)
611 {
612 	struct rt6_info *rt;
613 
614 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
615 
616 	rt = (struct rt6_info *) skb->dst;
617 	if (rt) {
618 		if (rt->rt6i_flags&RTF_CACHE) {
619 			dst_set_expires(&rt->u.dst, 0);
620 			rt->rt6i_flags |= RTF_EXPIRES;
621 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
622 			rt->rt6i_node->fn_sernum = -1;
623 	}
624 }
625 
626 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
627 {
628 	struct rt6_info *rt6 = (struct rt6_info*)dst;
629 
630 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
631 		rt6->rt6i_flags |= RTF_MODIFIED;
632 		if (mtu < IPV6_MIN_MTU) {
633 			mtu = IPV6_MIN_MTU;
634 			dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
635 		}
636 		dst->metrics[RTAX_MTU-1] = mtu;
637 	}
638 }
639 
640 /* Protected by rt6_lock.  */
641 static struct dst_entry *ndisc_dst_gc_list;
642 static int ipv6_get_mtu(struct net_device *dev);
643 
644 static inline unsigned int ipv6_advmss(unsigned int mtu)
645 {
646 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
647 
648 	if (mtu < ip6_rt_min_advmss)
649 		mtu = ip6_rt_min_advmss;
650 
651 	/*
652 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
653 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
654 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
655 	 * rely only on pmtu discovery"
656 	 */
657 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
658 		mtu = IPV6_MAXPLEN;
659 	return mtu;
660 }
661 
662 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
663 				  struct neighbour *neigh,
664 				  struct in6_addr *addr,
665 				  int (*output)(struct sk_buff *))
666 {
667 	struct rt6_info *rt;
668 	struct inet6_dev *idev = in6_dev_get(dev);
669 
670 	if (unlikely(idev == NULL))
671 		return NULL;
672 
673 	rt = ip6_dst_alloc();
674 	if (unlikely(rt == NULL)) {
675 		in6_dev_put(idev);
676 		goto out;
677 	}
678 
679 	dev_hold(dev);
680 	if (neigh)
681 		neigh_hold(neigh);
682 	else
683 		neigh = ndisc_get_neigh(dev, addr);
684 
685 	rt->rt6i_dev	  = dev;
686 	rt->rt6i_idev     = idev;
687 	rt->rt6i_nexthop  = neigh;
688 	atomic_set(&rt->u.dst.__refcnt, 1);
689 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
690 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
691 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
692 	rt->u.dst.output  = output;
693 
694 #if 0	/* there's no chance to use these for ndisc */
695 	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
696 				? DST_HOST
697 				: 0;
698 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
699 	rt->rt6i_dst.plen = 128;
700 #endif
701 
702 	write_lock_bh(&rt6_lock);
703 	rt->u.dst.next = ndisc_dst_gc_list;
704 	ndisc_dst_gc_list = &rt->u.dst;
705 	write_unlock_bh(&rt6_lock);
706 
707 	fib6_force_start_gc();
708 
709 out:
710 	return (struct dst_entry *)rt;
711 }
712 
713 int ndisc_dst_gc(int *more)
714 {
715 	struct dst_entry *dst, *next, **pprev;
716 	int freed;
717 
718 	next = NULL;
719 	pprev = &ndisc_dst_gc_list;
720 	freed = 0;
721 	while ((dst = *pprev) != NULL) {
722 		if (!atomic_read(&dst->__refcnt)) {
723 			*pprev = dst->next;
724 			dst_free(dst);
725 			freed++;
726 		} else {
727 			pprev = &dst->next;
728 			(*more)++;
729 		}
730 	}
731 
732 	return freed;
733 }
734 
735 static int ip6_dst_gc(void)
736 {
737 	static unsigned expire = 30*HZ;
738 	static unsigned long last_gc;
739 	unsigned long now = jiffies;
740 
741 	if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
742 	    atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
743 		goto out;
744 
745 	expire++;
746 	fib6_run_gc(expire);
747 	last_gc = now;
748 	if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
749 		expire = ip6_rt_gc_timeout>>1;
750 
751 out:
752 	expire -= expire>>ip6_rt_gc_elasticity;
753 	return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
754 }
755 
756 /* Clean host part of a prefix. Not necessary in radix tree,
757    but results in cleaner routing tables.
758 
759    Remove it only when all the things will work!
760  */
761 
762 static int ipv6_get_mtu(struct net_device *dev)
763 {
764 	int mtu = IPV6_MIN_MTU;
765 	struct inet6_dev *idev;
766 
767 	idev = in6_dev_get(dev);
768 	if (idev) {
769 		mtu = idev->cnf.mtu6;
770 		in6_dev_put(idev);
771 	}
772 	return mtu;
773 }
774 
775 int ipv6_get_hoplimit(struct net_device *dev)
776 {
777 	int hoplimit = ipv6_devconf.hop_limit;
778 	struct inet6_dev *idev;
779 
780 	idev = in6_dev_get(dev);
781 	if (idev) {
782 		hoplimit = idev->cnf.hop_limit;
783 		in6_dev_put(idev);
784 	}
785 	return hoplimit;
786 }
787 
788 /*
789  *
790  */
791 
792 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
793 		void *_rtattr, struct netlink_skb_parms *req)
794 {
795 	int err;
796 	struct rtmsg *r;
797 	struct rtattr **rta;
798 	struct rt6_info *rt = NULL;
799 	struct net_device *dev = NULL;
800 	struct inet6_dev *idev = NULL;
801 	int addr_type;
802 
803 	rta = (struct rtattr **) _rtattr;
804 
805 	if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
806 		return -EINVAL;
807 #ifndef CONFIG_IPV6_SUBTREES
808 	if (rtmsg->rtmsg_src_len)
809 		return -EINVAL;
810 #endif
811 	if (rtmsg->rtmsg_ifindex) {
812 		err = -ENODEV;
813 		dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
814 		if (!dev)
815 			goto out;
816 		idev = in6_dev_get(dev);
817 		if (!idev)
818 			goto out;
819 	}
820 
821 	if (rtmsg->rtmsg_metric == 0)
822 		rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
823 
824 	rt = ip6_dst_alloc();
825 
826 	if (rt == NULL) {
827 		err = -ENOMEM;
828 		goto out;
829 	}
830 
831 	rt->u.dst.obsolete = -1;
832 	rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
833 	if (nlh && (r = NLMSG_DATA(nlh))) {
834 		rt->rt6i_protocol = r->rtm_protocol;
835 	} else {
836 		rt->rt6i_protocol = RTPROT_BOOT;
837 	}
838 
839 	addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
840 
841 	if (addr_type & IPV6_ADDR_MULTICAST)
842 		rt->u.dst.input = ip6_mc_input;
843 	else
844 		rt->u.dst.input = ip6_forward;
845 
846 	rt->u.dst.output = ip6_output;
847 
848 	ipv6_addr_prefix(&rt->rt6i_dst.addr,
849 			 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
850 	rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
851 	if (rt->rt6i_dst.plen == 128)
852 	       rt->u.dst.flags = DST_HOST;
853 
854 #ifdef CONFIG_IPV6_SUBTREES
855 	ipv6_addr_prefix(&rt->rt6i_src.addr,
856 			 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
857 	rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
858 #endif
859 
860 	rt->rt6i_metric = rtmsg->rtmsg_metric;
861 
862 	/* We cannot add true routes via loopback here,
863 	   they would result in kernel looping; promote them to reject routes
864 	 */
865 	if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
866 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
867 		/* hold loopback dev/idev if we haven't done so. */
868 		if (dev != &loopback_dev) {
869 			if (dev) {
870 				dev_put(dev);
871 				in6_dev_put(idev);
872 			}
873 			dev = &loopback_dev;
874 			dev_hold(dev);
875 			idev = in6_dev_get(dev);
876 			if (!idev) {
877 				err = -ENODEV;
878 				goto out;
879 			}
880 		}
881 		rt->u.dst.output = ip6_pkt_discard_out;
882 		rt->u.dst.input = ip6_pkt_discard;
883 		rt->u.dst.error = -ENETUNREACH;
884 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
885 		goto install_route;
886 	}
887 
888 	if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
889 		struct in6_addr *gw_addr;
890 		int gwa_type;
891 
892 		gw_addr = &rtmsg->rtmsg_gateway;
893 		ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
894 		gwa_type = ipv6_addr_type(gw_addr);
895 
896 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
897 			struct rt6_info *grt;
898 
899 			/* IPv6 strictly inhibits using not link-local
900 			   addresses as nexthop address.
901 			   Otherwise, router will not able to send redirects.
902 			   It is very good, but in some (rare!) circumstances
903 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
904 			   some exceptions. --ANK
905 			 */
906 			err = -EINVAL;
907 			if (!(gwa_type&IPV6_ADDR_UNICAST))
908 				goto out;
909 
910 			grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
911 
912 			err = -EHOSTUNREACH;
913 			if (grt == NULL)
914 				goto out;
915 			if (dev) {
916 				if (dev != grt->rt6i_dev) {
917 					dst_release(&grt->u.dst);
918 					goto out;
919 				}
920 			} else {
921 				dev = grt->rt6i_dev;
922 				idev = grt->rt6i_idev;
923 				dev_hold(dev);
924 				in6_dev_hold(grt->rt6i_idev);
925 			}
926 			if (!(grt->rt6i_flags&RTF_GATEWAY))
927 				err = 0;
928 			dst_release(&grt->u.dst);
929 
930 			if (err)
931 				goto out;
932 		}
933 		err = -EINVAL;
934 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
935 			goto out;
936 	}
937 
938 	err = -ENODEV;
939 	if (dev == NULL)
940 		goto out;
941 
942 	if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
943 		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
944 		if (IS_ERR(rt->rt6i_nexthop)) {
945 			err = PTR_ERR(rt->rt6i_nexthop);
946 			rt->rt6i_nexthop = NULL;
947 			goto out;
948 		}
949 	}
950 
951 	rt->rt6i_flags = rtmsg->rtmsg_flags;
952 
953 install_route:
954 	if (rta && rta[RTA_METRICS-1]) {
955 		int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
956 		struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
957 
958 		while (RTA_OK(attr, attrlen)) {
959 			unsigned flavor = attr->rta_type;
960 			if (flavor) {
961 				if (flavor > RTAX_MAX) {
962 					err = -EINVAL;
963 					goto out;
964 				}
965 				rt->u.dst.metrics[flavor-1] =
966 					*(u32 *)RTA_DATA(attr);
967 			}
968 			attr = RTA_NEXT(attr, attrlen);
969 		}
970 	}
971 
972 	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
973 		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
974 	if (!rt->u.dst.metrics[RTAX_MTU-1])
975 		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
976 	if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
977 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
978 	rt->u.dst.dev = dev;
979 	rt->rt6i_idev = idev;
980 	return ip6_ins_rt(rt, nlh, _rtattr, req);
981 
982 out:
983 	if (dev)
984 		dev_put(dev);
985 	if (idev)
986 		in6_dev_put(idev);
987 	if (rt)
988 		dst_free((struct dst_entry *) rt);
989 	return err;
990 }
991 
992 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
993 {
994 	int err;
995 
996 	write_lock_bh(&rt6_lock);
997 
998 	rt6_reset_dflt_pointer(NULL);
999 
1000 	err = fib6_del(rt, nlh, _rtattr, req);
1001 	dst_release(&rt->u.dst);
1002 
1003 	write_unlock_bh(&rt6_lock);
1004 
1005 	return err;
1006 }
1007 
1008 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1009 {
1010 	struct fib6_node *fn;
1011 	struct rt6_info *rt;
1012 	int err = -ESRCH;
1013 
1014 	read_lock_bh(&rt6_lock);
1015 
1016 	fn = fib6_locate(&ip6_routing_table,
1017 			 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1018 			 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1019 
1020 	if (fn) {
1021 		for (rt = fn->leaf; rt; rt = rt->u.next) {
1022 			if (rtmsg->rtmsg_ifindex &&
1023 			    (rt->rt6i_dev == NULL ||
1024 			     rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1025 				continue;
1026 			if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1027 			    !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1028 				continue;
1029 			if (rtmsg->rtmsg_metric &&
1030 			    rtmsg->rtmsg_metric != rt->rt6i_metric)
1031 				continue;
1032 			dst_hold(&rt->u.dst);
1033 			read_unlock_bh(&rt6_lock);
1034 
1035 			return ip6_del_rt(rt, nlh, _rtattr, req);
1036 		}
1037 	}
1038 	read_unlock_bh(&rt6_lock);
1039 
1040 	return err;
1041 }
1042 
1043 /*
1044  *	Handle redirects
1045  */
1046 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1047 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1048 {
1049 	struct rt6_info *rt, *nrt;
1050 
1051 	/* Locate old route to this destination. */
1052 	rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1053 
1054 	if (rt == NULL)
1055 		return;
1056 
1057 	if (neigh->dev != rt->rt6i_dev)
1058 		goto out;
1059 
1060 	/*
1061 	 * Current route is on-link; redirect is always invalid.
1062 	 *
1063 	 * Seems, previous statement is not true. It could
1064 	 * be node, which looks for us as on-link (f.e. proxy ndisc)
1065 	 * But then router serving it might decide, that we should
1066 	 * know truth 8)8) --ANK (980726).
1067 	 */
1068 	if (!(rt->rt6i_flags&RTF_GATEWAY))
1069 		goto out;
1070 
1071 	/*
1072 	 *	RFC 2461 specifies that redirects should only be
1073 	 *	accepted if they come from the nexthop to the target.
1074 	 *	Due to the way default routers are chosen, this notion
1075 	 *	is a bit fuzzy and one might need to check all default
1076 	 *	routers.
1077 	 */
1078 	if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1079 		if (rt->rt6i_flags & RTF_DEFAULT) {
1080 			struct rt6_info *rt1;
1081 
1082 			read_lock(&rt6_lock);
1083 			for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1084 				if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1085 					dst_hold(&rt1->u.dst);
1086 					dst_release(&rt->u.dst);
1087 					read_unlock(&rt6_lock);
1088 					rt = rt1;
1089 					goto source_ok;
1090 				}
1091 			}
1092 			read_unlock(&rt6_lock);
1093 		}
1094 		if (net_ratelimit())
1095 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1096 			       "for redirect target\n");
1097 		goto out;
1098 	}
1099 
1100 source_ok:
1101 
1102 	/*
1103 	 *	We have finally decided to accept it.
1104 	 */
1105 
1106 	neigh_update(neigh, lladdr, NUD_STALE,
1107 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1108 		     NEIGH_UPDATE_F_OVERRIDE|
1109 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1110 				     NEIGH_UPDATE_F_ISROUTER))
1111 		     );
1112 
1113 	/*
1114 	 * Redirect received -> path was valid.
1115 	 * Look, redirects are sent only in response to data packets,
1116 	 * so that this nexthop apparently is reachable. --ANK
1117 	 */
1118 	dst_confirm(&rt->u.dst);
1119 
1120 	/* Duplicate redirect: silently ignore. */
1121 	if (neigh == rt->u.dst.neighbour)
1122 		goto out;
1123 
1124 	nrt = ip6_rt_copy(rt);
1125 	if (nrt == NULL)
1126 		goto out;
1127 
1128 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1129 	if (on_link)
1130 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1131 
1132 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1133 	nrt->rt6i_dst.plen = 128;
1134 	nrt->u.dst.flags |= DST_HOST;
1135 
1136 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1137 	nrt->rt6i_nexthop = neigh_clone(neigh);
1138 	/* Reset pmtu, it may be better */
1139 	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1140 	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1141 
1142 	if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1143 		goto out;
1144 
1145 	if (rt->rt6i_flags&RTF_CACHE) {
1146 		ip6_del_rt(rt, NULL, NULL, NULL);
1147 		return;
1148 	}
1149 
1150 out:
1151         dst_release(&rt->u.dst);
1152 	return;
1153 }
1154 
1155 /*
1156  *	Handle ICMP "packet too big" messages
1157  *	i.e. Path MTU discovery
1158  */
1159 
1160 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1161 			struct net_device *dev, u32 pmtu)
1162 {
1163 	struct rt6_info *rt, *nrt;
1164 	int allfrag = 0;
1165 
1166 	rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1167 	if (rt == NULL)
1168 		return;
1169 
1170 	if (pmtu >= dst_mtu(&rt->u.dst))
1171 		goto out;
1172 
1173 	if (pmtu < IPV6_MIN_MTU) {
1174 		/*
1175 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1176 		 * MTU (1280) and a fragment header should always be included
1177 		 * after a node receiving Too Big message reporting PMTU is
1178 		 * less than the IPv6 Minimum Link MTU.
1179 		 */
1180 		pmtu = IPV6_MIN_MTU;
1181 		allfrag = 1;
1182 	}
1183 
1184 	/* New mtu received -> path was valid.
1185 	   They are sent only in response to data packets,
1186 	   so that this nexthop apparently is reachable. --ANK
1187 	 */
1188 	dst_confirm(&rt->u.dst);
1189 
1190 	/* Host route. If it is static, it would be better
1191 	   not to override it, but add new one, so that
1192 	   when cache entry will expire old pmtu
1193 	   would return automatically.
1194 	 */
1195 	if (rt->rt6i_flags & RTF_CACHE) {
1196 		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1197 		if (allfrag)
1198 			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1199 		dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1200 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1201 		goto out;
1202 	}
1203 
1204 	/* Network route.
1205 	   Two cases are possible:
1206 	   1. It is connected route. Action: COW
1207 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1208 	 */
1209 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1210 		nrt = rt6_cow(rt, daddr, saddr, NULL);
1211 		if (!nrt->u.dst.error) {
1212 			nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1213 			if (allfrag)
1214 				nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1215 			/* According to RFC 1981, detecting PMTU increase shouldn't be
1216 			   happened within 5 mins, the recommended timer is 10 mins.
1217 			   Here this route expiration time is set to ip6_rt_mtu_expires
1218 			   which is 10 mins. After 10 mins the decreased pmtu is expired
1219 			   and detecting PMTU increase will be automatically happened.
1220 			 */
1221 			dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1222 			nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1223 		}
1224 		dst_release(&nrt->u.dst);
1225 	} else {
1226 		nrt = ip6_rt_copy(rt);
1227 		if (nrt == NULL)
1228 			goto out;
1229 		ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1230 		nrt->rt6i_dst.plen = 128;
1231 		nrt->u.dst.flags |= DST_HOST;
1232 		nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1233 		dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1234 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1235 		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1236 		if (allfrag)
1237 			nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1238 		ip6_ins_rt(nrt, NULL, NULL, NULL);
1239 	}
1240 
1241 out:
1242 	dst_release(&rt->u.dst);
1243 }
1244 
1245 /*
1246  *	Misc support functions
1247  */
1248 
1249 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1250 {
1251 	struct rt6_info *rt = ip6_dst_alloc();
1252 
1253 	if (rt) {
1254 		rt->u.dst.input = ort->u.dst.input;
1255 		rt->u.dst.output = ort->u.dst.output;
1256 
1257 		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1258 		rt->u.dst.dev = ort->u.dst.dev;
1259 		if (rt->u.dst.dev)
1260 			dev_hold(rt->u.dst.dev);
1261 		rt->rt6i_idev = ort->rt6i_idev;
1262 		if (rt->rt6i_idev)
1263 			in6_dev_hold(rt->rt6i_idev);
1264 		rt->u.dst.lastuse = jiffies;
1265 		rt->rt6i_expires = 0;
1266 
1267 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1268 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1269 		rt->rt6i_metric = 0;
1270 
1271 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1272 #ifdef CONFIG_IPV6_SUBTREES
1273 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1274 #endif
1275 	}
1276 	return rt;
1277 }
1278 
1279 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1280 {
1281 	struct rt6_info *rt;
1282 	struct fib6_node *fn;
1283 
1284 	fn = &ip6_routing_table;
1285 
1286 	write_lock_bh(&rt6_lock);
1287 	for (rt = fn->leaf; rt; rt=rt->u.next) {
1288 		if (dev == rt->rt6i_dev &&
1289 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1290 			break;
1291 	}
1292 	if (rt)
1293 		dst_hold(&rt->u.dst);
1294 	write_unlock_bh(&rt6_lock);
1295 	return rt;
1296 }
1297 
1298 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1299 				     struct net_device *dev)
1300 {
1301 	struct in6_rtmsg rtmsg;
1302 
1303 	memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1304 	rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1305 	ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1306 	rtmsg.rtmsg_metric = 1024;
1307 	rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1308 
1309 	rtmsg.rtmsg_ifindex = dev->ifindex;
1310 
1311 	ip6_route_add(&rtmsg, NULL, NULL, NULL);
1312 	return rt6_get_dflt_router(gwaddr, dev);
1313 }
1314 
1315 void rt6_purge_dflt_routers(void)
1316 {
1317 	struct rt6_info *rt;
1318 
1319 restart:
1320 	read_lock_bh(&rt6_lock);
1321 	for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1322 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1323 			dst_hold(&rt->u.dst);
1324 
1325 			rt6_reset_dflt_pointer(NULL);
1326 
1327 			read_unlock_bh(&rt6_lock);
1328 
1329 			ip6_del_rt(rt, NULL, NULL, NULL);
1330 
1331 			goto restart;
1332 		}
1333 	}
1334 	read_unlock_bh(&rt6_lock);
1335 }
1336 
1337 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1338 {
1339 	struct in6_rtmsg rtmsg;
1340 	int err;
1341 
1342 	switch(cmd) {
1343 	case SIOCADDRT:		/* Add a route */
1344 	case SIOCDELRT:		/* Delete a route */
1345 		if (!capable(CAP_NET_ADMIN))
1346 			return -EPERM;
1347 		err = copy_from_user(&rtmsg, arg,
1348 				     sizeof(struct in6_rtmsg));
1349 		if (err)
1350 			return -EFAULT;
1351 
1352 		rtnl_lock();
1353 		switch (cmd) {
1354 		case SIOCADDRT:
1355 			err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1356 			break;
1357 		case SIOCDELRT:
1358 			err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1359 			break;
1360 		default:
1361 			err = -EINVAL;
1362 		}
1363 		rtnl_unlock();
1364 
1365 		return err;
1366 	};
1367 
1368 	return -EINVAL;
1369 }
1370 
1371 /*
1372  *	Drop the packet on the floor
1373  */
1374 
1375 static int ip6_pkt_discard(struct sk_buff *skb)
1376 {
1377 	IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1378 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1379 	kfree_skb(skb);
1380 	return 0;
1381 }
1382 
1383 static int ip6_pkt_discard_out(struct sk_buff *skb)
1384 {
1385 	skb->dev = skb->dst->dev;
1386 	return ip6_pkt_discard(skb);
1387 }
1388 
1389 /*
1390  *	Allocate a dst for local (unicast / anycast) address.
1391  */
1392 
1393 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1394 				    const struct in6_addr *addr,
1395 				    int anycast)
1396 {
1397 	struct rt6_info *rt = ip6_dst_alloc();
1398 
1399 	if (rt == NULL)
1400 		return ERR_PTR(-ENOMEM);
1401 
1402 	dev_hold(&loopback_dev);
1403 	in6_dev_hold(idev);
1404 
1405 	rt->u.dst.flags = DST_HOST;
1406 	rt->u.dst.input = ip6_input;
1407 	rt->u.dst.output = ip6_output;
1408 	rt->rt6i_dev = &loopback_dev;
1409 	rt->rt6i_idev = idev;
1410 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1411 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1412 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1413 	rt->u.dst.obsolete = -1;
1414 
1415 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1416 	if (!anycast)
1417 		rt->rt6i_flags |= RTF_LOCAL;
1418 	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1419 	if (rt->rt6i_nexthop == NULL) {
1420 		dst_free((struct dst_entry *) rt);
1421 		return ERR_PTR(-ENOMEM);
1422 	}
1423 
1424 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1425 	rt->rt6i_dst.plen = 128;
1426 
1427 	atomic_set(&rt->u.dst.__refcnt, 1);
1428 
1429 	return rt;
1430 }
1431 
1432 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1433 {
1434 	if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1435 	    rt != &ip6_null_entry) {
1436 		RT6_TRACE("deleted by ifdown %p\n", rt);
1437 		return -1;
1438 	}
1439 	return 0;
1440 }
1441 
1442 void rt6_ifdown(struct net_device *dev)
1443 {
1444 	write_lock_bh(&rt6_lock);
1445 	fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1446 	write_unlock_bh(&rt6_lock);
1447 }
1448 
1449 struct rt6_mtu_change_arg
1450 {
1451 	struct net_device *dev;
1452 	unsigned mtu;
1453 };
1454 
1455 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1456 {
1457 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1458 	struct inet6_dev *idev;
1459 
1460 	/* In IPv6 pmtu discovery is not optional,
1461 	   so that RTAX_MTU lock cannot disable it.
1462 	   We still use this lock to block changes
1463 	   caused by addrconf/ndisc.
1464 	*/
1465 
1466 	idev = __in6_dev_get(arg->dev);
1467 	if (idev == NULL)
1468 		return 0;
1469 
1470 	/* For administrative MTU increase, there is no way to discover
1471 	   IPv6 PMTU increase, so PMTU increase should be updated here.
1472 	   Since RFC 1981 doesn't include administrative MTU increase
1473 	   update PMTU increase is a MUST. (i.e. jumbo frame)
1474 	 */
1475 	/*
1476 	   If new MTU is less than route PMTU, this new MTU will be the
1477 	   lowest MTU in the path, update the route PMTU to reflect PMTU
1478 	   decreases; if new MTU is greater than route PMTU, and the
1479 	   old MTU is the lowest MTU in the path, update the route PMTU
1480 	   to reflect the increase. In this case if the other nodes' MTU
1481 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
1482 	   PMTU discouvery.
1483 	 */
1484 	if (rt->rt6i_dev == arg->dev &&
1485 	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1486             (dst_mtu(&rt->u.dst) > arg->mtu ||
1487              (dst_mtu(&rt->u.dst) < arg->mtu &&
1488 	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1489 		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1490 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1491 	return 0;
1492 }
1493 
1494 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1495 {
1496 	struct rt6_mtu_change_arg arg;
1497 
1498 	arg.dev = dev;
1499 	arg.mtu = mtu;
1500 	read_lock_bh(&rt6_lock);
1501 	fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1502 	read_unlock_bh(&rt6_lock);
1503 }
1504 
1505 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1506 			      struct in6_rtmsg *rtmsg)
1507 {
1508 	memset(rtmsg, 0, sizeof(*rtmsg));
1509 
1510 	rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1511 	rtmsg->rtmsg_src_len = r->rtm_src_len;
1512 	rtmsg->rtmsg_flags = RTF_UP;
1513 	if (r->rtm_type == RTN_UNREACHABLE)
1514 		rtmsg->rtmsg_flags |= RTF_REJECT;
1515 
1516 	if (rta[RTA_GATEWAY-1]) {
1517 		if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1518 			return -EINVAL;
1519 		memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1520 		rtmsg->rtmsg_flags |= RTF_GATEWAY;
1521 	}
1522 	if (rta[RTA_DST-1]) {
1523 		if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1524 			return -EINVAL;
1525 		memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1526 	}
1527 	if (rta[RTA_SRC-1]) {
1528 		if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1529 			return -EINVAL;
1530 		memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1531 	}
1532 	if (rta[RTA_OIF-1]) {
1533 		if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1534 			return -EINVAL;
1535 		memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1536 	}
1537 	if (rta[RTA_PRIORITY-1]) {
1538 		if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1539 			return -EINVAL;
1540 		memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1541 	}
1542 	return 0;
1543 }
1544 
1545 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1546 {
1547 	struct rtmsg *r = NLMSG_DATA(nlh);
1548 	struct in6_rtmsg rtmsg;
1549 
1550 	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1551 		return -EINVAL;
1552 	return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1553 }
1554 
1555 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1556 {
1557 	struct rtmsg *r = NLMSG_DATA(nlh);
1558 	struct in6_rtmsg rtmsg;
1559 
1560 	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1561 		return -EINVAL;
1562 	return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1563 }
1564 
1565 struct rt6_rtnl_dump_arg
1566 {
1567 	struct sk_buff *skb;
1568 	struct netlink_callback *cb;
1569 };
1570 
1571 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1572 			 struct in6_addr *dst, struct in6_addr *src,
1573 			 int iif, int type, u32 pid, u32 seq,
1574 			 int prefix, unsigned int flags)
1575 {
1576 	struct rtmsg *rtm;
1577 	struct nlmsghdr  *nlh;
1578 	unsigned char	 *b = skb->tail;
1579 	struct rta_cacheinfo ci;
1580 
1581 	if (prefix) {	/* user wants prefix routes only */
1582 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1583 			/* success since this is not a prefix route */
1584 			return 1;
1585 		}
1586 	}
1587 
1588 	nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1589 	rtm = NLMSG_DATA(nlh);
1590 	rtm->rtm_family = AF_INET6;
1591 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
1592 	rtm->rtm_src_len = rt->rt6i_src.plen;
1593 	rtm->rtm_tos = 0;
1594 	rtm->rtm_table = RT_TABLE_MAIN;
1595 	if (rt->rt6i_flags&RTF_REJECT)
1596 		rtm->rtm_type = RTN_UNREACHABLE;
1597 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1598 		rtm->rtm_type = RTN_LOCAL;
1599 	else
1600 		rtm->rtm_type = RTN_UNICAST;
1601 	rtm->rtm_flags = 0;
1602 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1603 	rtm->rtm_protocol = rt->rt6i_protocol;
1604 	if (rt->rt6i_flags&RTF_DYNAMIC)
1605 		rtm->rtm_protocol = RTPROT_REDIRECT;
1606 	else if (rt->rt6i_flags & RTF_ADDRCONF)
1607 		rtm->rtm_protocol = RTPROT_KERNEL;
1608 	else if (rt->rt6i_flags&RTF_DEFAULT)
1609 		rtm->rtm_protocol = RTPROT_RA;
1610 
1611 	if (rt->rt6i_flags&RTF_CACHE)
1612 		rtm->rtm_flags |= RTM_F_CLONED;
1613 
1614 	if (dst) {
1615 		RTA_PUT(skb, RTA_DST, 16, dst);
1616 	        rtm->rtm_dst_len = 128;
1617 	} else if (rtm->rtm_dst_len)
1618 		RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1619 #ifdef CONFIG_IPV6_SUBTREES
1620 	if (src) {
1621 		RTA_PUT(skb, RTA_SRC, 16, src);
1622 	        rtm->rtm_src_len = 128;
1623 	} else if (rtm->rtm_src_len)
1624 		RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1625 #endif
1626 	if (iif)
1627 		RTA_PUT(skb, RTA_IIF, 4, &iif);
1628 	else if (dst) {
1629 		struct in6_addr saddr_buf;
1630 		if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1631 			RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1632 	}
1633 	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1634 		goto rtattr_failure;
1635 	if (rt->u.dst.neighbour)
1636 		RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1637 	if (rt->u.dst.dev)
1638 		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1639 	RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1640 	ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1641 	if (rt->rt6i_expires)
1642 		ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1643 	else
1644 		ci.rta_expires = 0;
1645 	ci.rta_used = rt->u.dst.__use;
1646 	ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1647 	ci.rta_error = rt->u.dst.error;
1648 	ci.rta_id = 0;
1649 	ci.rta_ts = 0;
1650 	ci.rta_tsage = 0;
1651 	RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1652 	nlh->nlmsg_len = skb->tail - b;
1653 	return skb->len;
1654 
1655 nlmsg_failure:
1656 rtattr_failure:
1657 	skb_trim(skb, b - skb->data);
1658 	return -1;
1659 }
1660 
1661 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1662 {
1663 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1664 	int prefix;
1665 
1666 	if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1667 		struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1668 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1669 	} else
1670 		prefix = 0;
1671 
1672 	return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1673 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1674 		     prefix, NLM_F_MULTI);
1675 }
1676 
1677 static int fib6_dump_node(struct fib6_walker_t *w)
1678 {
1679 	int res;
1680 	struct rt6_info *rt;
1681 
1682 	for (rt = w->leaf; rt; rt = rt->u.next) {
1683 		res = rt6_dump_route(rt, w->args);
1684 		if (res < 0) {
1685 			/* Frame is full, suspend walking */
1686 			w->leaf = rt;
1687 			return 1;
1688 		}
1689 		BUG_TRAP(res!=0);
1690 	}
1691 	w->leaf = NULL;
1692 	return 0;
1693 }
1694 
1695 static void fib6_dump_end(struct netlink_callback *cb)
1696 {
1697 	struct fib6_walker_t *w = (void*)cb->args[0];
1698 
1699 	if (w) {
1700 		cb->args[0] = 0;
1701 		fib6_walker_unlink(w);
1702 		kfree(w);
1703 	}
1704 	cb->done = (void*)cb->args[1];
1705 	cb->args[1] = 0;
1706 }
1707 
1708 static int fib6_dump_done(struct netlink_callback *cb)
1709 {
1710 	fib6_dump_end(cb);
1711 	return cb->done ? cb->done(cb) : 0;
1712 }
1713 
1714 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1715 {
1716 	struct rt6_rtnl_dump_arg arg;
1717 	struct fib6_walker_t *w;
1718 	int res;
1719 
1720 	arg.skb = skb;
1721 	arg.cb = cb;
1722 
1723 	w = (void*)cb->args[0];
1724 	if (w == NULL) {
1725 		/* New dump:
1726 		 *
1727 		 * 1. hook callback destructor.
1728 		 */
1729 		cb->args[1] = (long)cb->done;
1730 		cb->done = fib6_dump_done;
1731 
1732 		/*
1733 		 * 2. allocate and initialize walker.
1734 		 */
1735 		w = kmalloc(sizeof(*w), GFP_ATOMIC);
1736 		if (w == NULL)
1737 			return -ENOMEM;
1738 		RT6_TRACE("dump<%p", w);
1739 		memset(w, 0, sizeof(*w));
1740 		w->root = &ip6_routing_table;
1741 		w->func = fib6_dump_node;
1742 		w->args = &arg;
1743 		cb->args[0] = (long)w;
1744 		read_lock_bh(&rt6_lock);
1745 		res = fib6_walk(w);
1746 		read_unlock_bh(&rt6_lock);
1747 	} else {
1748 		w->args = &arg;
1749 		read_lock_bh(&rt6_lock);
1750 		res = fib6_walk_continue(w);
1751 		read_unlock_bh(&rt6_lock);
1752 	}
1753 #if RT6_DEBUG >= 3
1754 	if (res <= 0 && skb->len == 0)
1755 		RT6_TRACE("%p>dump end\n", w);
1756 #endif
1757 	res = res < 0 ? res : skb->len;
1758 	/* res < 0 is an error. (really, impossible)
1759 	   res == 0 means that dump is complete, but skb still can contain data.
1760 	   res > 0 dump is not complete, but frame is full.
1761 	 */
1762 	/* Destroy walker, if dump of this table is complete. */
1763 	if (res <= 0)
1764 		fib6_dump_end(cb);
1765 	return res;
1766 }
1767 
1768 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1769 {
1770 	struct rtattr **rta = arg;
1771 	int iif = 0;
1772 	int err = -ENOBUFS;
1773 	struct sk_buff *skb;
1774 	struct flowi fl;
1775 	struct rt6_info *rt;
1776 
1777 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1778 	if (skb == NULL)
1779 		goto out;
1780 
1781 	/* Reserve room for dummy headers, this skb can pass
1782 	   through good chunk of routing engine.
1783 	 */
1784 	skb->mac.raw = skb->data;
1785 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1786 
1787 	memset(&fl, 0, sizeof(fl));
1788 	if (rta[RTA_SRC-1])
1789 		ipv6_addr_copy(&fl.fl6_src,
1790 			       (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1791 	if (rta[RTA_DST-1])
1792 		ipv6_addr_copy(&fl.fl6_dst,
1793 			       (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1794 
1795 	if (rta[RTA_IIF-1])
1796 		memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1797 
1798 	if (iif) {
1799 		struct net_device *dev;
1800 		dev = __dev_get_by_index(iif);
1801 		if (!dev) {
1802 			err = -ENODEV;
1803 			goto out_free;
1804 		}
1805 	}
1806 
1807 	fl.oif = 0;
1808 	if (rta[RTA_OIF-1])
1809 		memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1810 
1811 	rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1812 
1813 	skb->dst = &rt->u.dst;
1814 
1815 	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1816 	err = rt6_fill_node(skb, rt,
1817 			    &fl.fl6_dst, &fl.fl6_src,
1818 			    iif,
1819 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1820 			    nlh->nlmsg_seq, 0, 0);
1821 	if (err < 0) {
1822 		err = -EMSGSIZE;
1823 		goto out_free;
1824 	}
1825 
1826 	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1827 	if (err > 0)
1828 		err = 0;
1829 out:
1830 	return err;
1831 out_free:
1832 	kfree_skb(skb);
1833 	goto out;
1834 }
1835 
1836 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1837 			struct netlink_skb_parms *req)
1838 {
1839 	struct sk_buff *skb;
1840 	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1841 	u32 pid = current->pid;
1842 	u32 seq = 0;
1843 
1844 	if (req)
1845 		pid = req->pid;
1846 	if (nlh)
1847 		seq = nlh->nlmsg_seq;
1848 
1849 	skb = alloc_skb(size, gfp_any());
1850 	if (!skb) {
1851 		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1852 		return;
1853 	}
1854 	if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1855 		kfree_skb(skb);
1856 		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1857 		return;
1858 	}
1859 	NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1860 	netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1861 }
1862 
1863 /*
1864  *	/proc
1865  */
1866 
1867 #ifdef CONFIG_PROC_FS
1868 
1869 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1870 
1871 struct rt6_proc_arg
1872 {
1873 	char *buffer;
1874 	int offset;
1875 	int length;
1876 	int skip;
1877 	int len;
1878 };
1879 
1880 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1881 {
1882 	struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1883 	int i;
1884 
1885 	if (arg->skip < arg->offset / RT6_INFO_LEN) {
1886 		arg->skip++;
1887 		return 0;
1888 	}
1889 
1890 	if (arg->len >= arg->length)
1891 		return 0;
1892 
1893 	for (i=0; i<16; i++) {
1894 		sprintf(arg->buffer + arg->len, "%02x",
1895 			rt->rt6i_dst.addr.s6_addr[i]);
1896 		arg->len += 2;
1897 	}
1898 	arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1899 			    rt->rt6i_dst.plen);
1900 
1901 #ifdef CONFIG_IPV6_SUBTREES
1902 	for (i=0; i<16; i++) {
1903 		sprintf(arg->buffer + arg->len, "%02x",
1904 			rt->rt6i_src.addr.s6_addr[i]);
1905 		arg->len += 2;
1906 	}
1907 	arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1908 			    rt->rt6i_src.plen);
1909 #else
1910 	sprintf(arg->buffer + arg->len,
1911 		"00000000000000000000000000000000 00 ");
1912 	arg->len += 36;
1913 #endif
1914 
1915 	if (rt->rt6i_nexthop) {
1916 		for (i=0; i<16; i++) {
1917 			sprintf(arg->buffer + arg->len, "%02x",
1918 				rt->rt6i_nexthop->primary_key[i]);
1919 			arg->len += 2;
1920 		}
1921 	} else {
1922 		sprintf(arg->buffer + arg->len,
1923 			"00000000000000000000000000000000");
1924 		arg->len += 32;
1925 	}
1926 	arg->len += sprintf(arg->buffer + arg->len,
1927 			    " %08x %08x %08x %08x %8s\n",
1928 			    rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1929 			    rt->u.dst.__use, rt->rt6i_flags,
1930 			    rt->rt6i_dev ? rt->rt6i_dev->name : "");
1931 	return 0;
1932 }
1933 
1934 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1935 {
1936 	struct rt6_proc_arg arg;
1937 	arg.buffer = buffer;
1938 	arg.offset = offset;
1939 	arg.length = length;
1940 	arg.skip = 0;
1941 	arg.len = 0;
1942 
1943 	read_lock_bh(&rt6_lock);
1944 	fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1945 	read_unlock_bh(&rt6_lock);
1946 
1947 	*start = buffer;
1948 	if (offset)
1949 		*start += offset % RT6_INFO_LEN;
1950 
1951 	arg.len -= offset % RT6_INFO_LEN;
1952 
1953 	if (arg.len > length)
1954 		arg.len = length;
1955 	if (arg.len < 0)
1956 		arg.len = 0;
1957 
1958 	return arg.len;
1959 }
1960 
1961 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1962 {
1963 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1964 		      rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1965 		      rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1966 		      rt6_stats.fib_rt_cache,
1967 		      atomic_read(&ip6_dst_ops.entries),
1968 		      rt6_stats.fib_discarded_routes);
1969 
1970 	return 0;
1971 }
1972 
1973 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1974 {
1975 	return single_open(file, rt6_stats_seq_show, NULL);
1976 }
1977 
1978 static struct file_operations rt6_stats_seq_fops = {
1979 	.owner	 = THIS_MODULE,
1980 	.open	 = rt6_stats_seq_open,
1981 	.read	 = seq_read,
1982 	.llseek	 = seq_lseek,
1983 	.release = single_release,
1984 };
1985 #endif	/* CONFIG_PROC_FS */
1986 
1987 #ifdef CONFIG_SYSCTL
1988 
1989 static int flush_delay;
1990 
1991 static
1992 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1993 			      void __user *buffer, size_t *lenp, loff_t *ppos)
1994 {
1995 	if (write) {
1996 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1997 		fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
1998 		return 0;
1999 	} else
2000 		return -EINVAL;
2001 }
2002 
2003 ctl_table ipv6_route_table[] = {
2004         {
2005 		.ctl_name	=	NET_IPV6_ROUTE_FLUSH,
2006 		.procname	=	"flush",
2007          	.data		=	&flush_delay,
2008 		.maxlen		=	sizeof(int),
2009 		.mode		=	0200,
2010          	.proc_handler	=	&ipv6_sysctl_rtcache_flush
2011 	},
2012 	{
2013 		.ctl_name	=	NET_IPV6_ROUTE_GC_THRESH,
2014 		.procname	=	"gc_thresh",
2015          	.data		=	&ip6_dst_ops.gc_thresh,
2016 		.maxlen		=	sizeof(int),
2017 		.mode		=	0644,
2018          	.proc_handler	=	&proc_dointvec,
2019 	},
2020 	{
2021 		.ctl_name	=	NET_IPV6_ROUTE_MAX_SIZE,
2022 		.procname	=	"max_size",
2023          	.data		=	&ip6_rt_max_size,
2024 		.maxlen		=	sizeof(int),
2025 		.mode		=	0644,
2026          	.proc_handler	=	&proc_dointvec,
2027 	},
2028 	{
2029 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2030 		.procname	=	"gc_min_interval",
2031          	.data		=	&ip6_rt_gc_min_interval,
2032 		.maxlen		=	sizeof(int),
2033 		.mode		=	0644,
2034          	.proc_handler	=	&proc_dointvec_jiffies,
2035 		.strategy	=	&sysctl_jiffies,
2036 	},
2037 	{
2038 		.ctl_name	=	NET_IPV6_ROUTE_GC_TIMEOUT,
2039 		.procname	=	"gc_timeout",
2040          	.data		=	&ip6_rt_gc_timeout,
2041 		.maxlen		=	sizeof(int),
2042 		.mode		=	0644,
2043          	.proc_handler	=	&proc_dointvec_jiffies,
2044 		.strategy	=	&sysctl_jiffies,
2045 	},
2046 	{
2047 		.ctl_name	=	NET_IPV6_ROUTE_GC_INTERVAL,
2048 		.procname	=	"gc_interval",
2049          	.data		=	&ip6_rt_gc_interval,
2050 		.maxlen		=	sizeof(int),
2051 		.mode		=	0644,
2052          	.proc_handler	=	&proc_dointvec_jiffies,
2053 		.strategy	=	&sysctl_jiffies,
2054 	},
2055 	{
2056 		.ctl_name	=	NET_IPV6_ROUTE_GC_ELASTICITY,
2057 		.procname	=	"gc_elasticity",
2058          	.data		=	&ip6_rt_gc_elasticity,
2059 		.maxlen		=	sizeof(int),
2060 		.mode		=	0644,
2061          	.proc_handler	=	&proc_dointvec_jiffies,
2062 		.strategy	=	&sysctl_jiffies,
2063 	},
2064 	{
2065 		.ctl_name	=	NET_IPV6_ROUTE_MTU_EXPIRES,
2066 		.procname	=	"mtu_expires",
2067          	.data		=	&ip6_rt_mtu_expires,
2068 		.maxlen		=	sizeof(int),
2069 		.mode		=	0644,
2070          	.proc_handler	=	&proc_dointvec_jiffies,
2071 		.strategy	=	&sysctl_jiffies,
2072 	},
2073 	{
2074 		.ctl_name	=	NET_IPV6_ROUTE_MIN_ADVMSS,
2075 		.procname	=	"min_adv_mss",
2076          	.data		=	&ip6_rt_min_advmss,
2077 		.maxlen		=	sizeof(int),
2078 		.mode		=	0644,
2079          	.proc_handler	=	&proc_dointvec_jiffies,
2080 		.strategy	=	&sysctl_jiffies,
2081 	},
2082 	{
2083 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2084 		.procname	=	"gc_min_interval_ms",
2085          	.data		=	&ip6_rt_gc_min_interval,
2086 		.maxlen		=	sizeof(int),
2087 		.mode		=	0644,
2088          	.proc_handler	=	&proc_dointvec_ms_jiffies,
2089 		.strategy	=	&sysctl_ms_jiffies,
2090 	},
2091 	{ .ctl_name = 0 }
2092 };
2093 
2094 #endif
2095 
2096 void __init ip6_route_init(void)
2097 {
2098 	struct proc_dir_entry *p;
2099 
2100 	ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2101 						     sizeof(struct rt6_info),
2102 						     0, SLAB_HWCACHE_ALIGN,
2103 						     NULL, NULL);
2104 	if (!ip6_dst_ops.kmem_cachep)
2105 		panic("cannot create ip6_dst_cache");
2106 
2107 	fib6_init();
2108 #ifdef 	CONFIG_PROC_FS
2109 	p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2110 	if (p)
2111 		p->owner = THIS_MODULE;
2112 
2113 	proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2114 #endif
2115 #ifdef CONFIG_XFRM
2116 	xfrm6_init();
2117 #endif
2118 }
2119 
2120 void ip6_route_cleanup(void)
2121 {
2122 #ifdef CONFIG_PROC_FS
2123 	proc_net_remove("ipv6_route");
2124 	proc_net_remove("rt6_stats");
2125 #endif
2126 #ifdef CONFIG_XFRM
2127 	xfrm6_fini();
2128 #endif
2129 	rt6_ifdown(NULL);
2130 	fib6_gc_cleanup();
2131 	kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2132 }
2133