xref: /linux/net/ipv6/route.c (revision d8327c784b51b57dac2c26cfad87dce0d68dfd98)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	$Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *	This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15 
16 /*	Changes:
17  *
18  *	YOSHIFUJI Hideaki @USAGI
19  *		reworked default router selection.
20  *		- respect outgoing interface
21  *		- select from (probably) reachable routers (i.e.
22  *		routers in REACHABLE, STALE, DELAY or PROBE states).
23  *		- always select the same router if it is (probably)
24  *		reachable.  otherwise, round-robin the list.
25  */
26 
27 #include <linux/capability.h>
28 #include <linux/config.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/init.h>
39 #include <linux/netlink.h>
40 #include <linux/if_arp.h>
41 
42 #ifdef 	CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46 
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57 
58 #include <asm/uaccess.h>
59 
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63 
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66 
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74 
75 
76 static int ip6_rt_max_size = 4096;
77 static int ip6_rt_gc_min_interval = HZ / 2;
78 static int ip6_rt_gc_timeout = 60*HZ;
79 int ip6_rt_gc_interval = 30*HZ;
80 static int ip6_rt_gc_elasticity = 9;
81 static int ip6_rt_mtu_expires = 10*60*HZ;
82 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83 
84 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
85 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
86 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
87 static void		ip6_dst_destroy(struct dst_entry *);
88 static void		ip6_dst_ifdown(struct dst_entry *,
89 				       struct net_device *dev, int how);
90 static int		 ip6_dst_gc(void);
91 
92 static int		ip6_pkt_discard(struct sk_buff *skb);
93 static int		ip6_pkt_discard_out(struct sk_buff *skb);
94 static void		ip6_link_failure(struct sk_buff *skb);
95 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
96 
97 static struct dst_ops ip6_dst_ops = {
98 	.family			=	AF_INET6,
99 	.protocol		=	__constant_htons(ETH_P_IPV6),
100 	.gc			=	ip6_dst_gc,
101 	.gc_thresh		=	1024,
102 	.check			=	ip6_dst_check,
103 	.destroy		=	ip6_dst_destroy,
104 	.ifdown			=	ip6_dst_ifdown,
105 	.negative_advice	=	ip6_negative_advice,
106 	.link_failure		=	ip6_link_failure,
107 	.update_pmtu		=	ip6_rt_update_pmtu,
108 	.entry_size		=	sizeof(struct rt6_info),
109 };
110 
111 struct rt6_info ip6_null_entry = {
112 	.u = {
113 		.dst = {
114 			.__refcnt	= ATOMIC_INIT(1),
115 			.__use		= 1,
116 			.dev		= &loopback_dev,
117 			.obsolete	= -1,
118 			.error		= -ENETUNREACH,
119 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
120 			.input		= ip6_pkt_discard,
121 			.output		= ip6_pkt_discard_out,
122 			.ops		= &ip6_dst_ops,
123 			.path		= (struct dst_entry*)&ip6_null_entry,
124 		}
125 	},
126 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
127 	.rt6i_metric	= ~(u32) 0,
128 	.rt6i_ref	= ATOMIC_INIT(1),
129 };
130 
131 struct fib6_node ip6_routing_table = {
132 	.leaf		= &ip6_null_entry,
133 	.fn_flags	= RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
134 };
135 
136 /* Protects all the ip6 fib */
137 
138 DEFINE_RWLOCK(rt6_lock);
139 
140 
141 /* allocate dst with ip6_dst_ops */
142 static __inline__ struct rt6_info *ip6_dst_alloc(void)
143 {
144 	return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
145 }
146 
147 static void ip6_dst_destroy(struct dst_entry *dst)
148 {
149 	struct rt6_info *rt = (struct rt6_info *)dst;
150 	struct inet6_dev *idev = rt->rt6i_idev;
151 
152 	if (idev != NULL) {
153 		rt->rt6i_idev = NULL;
154 		in6_dev_put(idev);
155 	}
156 }
157 
158 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
159 			   int how)
160 {
161 	struct rt6_info *rt = (struct rt6_info *)dst;
162 	struct inet6_dev *idev = rt->rt6i_idev;
163 
164 	if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
165 		struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
166 		if (loopback_idev != NULL) {
167 			rt->rt6i_idev = loopback_idev;
168 			in6_dev_put(idev);
169 		}
170 	}
171 }
172 
173 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
174 {
175 	return (rt->rt6i_flags & RTF_EXPIRES &&
176 		time_after(jiffies, rt->rt6i_expires));
177 }
178 
179 /*
180  *	Route lookup. Any rt6_lock is implied.
181  */
182 
183 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
184 						    int oif,
185 						    int strict)
186 {
187 	struct rt6_info *local = NULL;
188 	struct rt6_info *sprt;
189 
190 	if (oif) {
191 		for (sprt = rt; sprt; sprt = sprt->u.next) {
192 			struct net_device *dev = sprt->rt6i_dev;
193 			if (dev->ifindex == oif)
194 				return sprt;
195 			if (dev->flags & IFF_LOOPBACK) {
196 				if (sprt->rt6i_idev == NULL ||
197 				    sprt->rt6i_idev->dev->ifindex != oif) {
198 					if (strict && oif)
199 						continue;
200 					if (local && (!oif ||
201 						      local->rt6i_idev->dev->ifindex == oif))
202 						continue;
203 				}
204 				local = sprt;
205 			}
206 		}
207 
208 		if (local)
209 			return local;
210 
211 		if (strict)
212 			return &ip6_null_entry;
213 	}
214 	return rt;
215 }
216 
217 /*
218  *	pointer to the last default router chosen. BH is disabled locally.
219  */
220 static struct rt6_info *rt6_dflt_pointer;
221 static DEFINE_SPINLOCK(rt6_dflt_lock);
222 
223 void rt6_reset_dflt_pointer(struct rt6_info *rt)
224 {
225 	spin_lock_bh(&rt6_dflt_lock);
226 	if (rt == NULL || rt == rt6_dflt_pointer) {
227 		RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
228 		rt6_dflt_pointer = NULL;
229 	}
230 	spin_unlock_bh(&rt6_dflt_lock);
231 }
232 
233 /* Default Router Selection (RFC 2461 6.3.6) */
234 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
235 {
236 	struct rt6_info *match = NULL;
237 	struct rt6_info *sprt;
238 	int mpri = 0;
239 
240 	for (sprt = rt; sprt; sprt = sprt->u.next) {
241 		struct neighbour *neigh;
242 		int m = 0;
243 
244 		if (!oif ||
245 		    (sprt->rt6i_dev &&
246 		     sprt->rt6i_dev->ifindex == oif))
247 			m += 8;
248 
249 		if (rt6_check_expired(sprt))
250 			continue;
251 
252 		if (sprt == rt6_dflt_pointer)
253 			m += 4;
254 
255 		if ((neigh = sprt->rt6i_nexthop) != NULL) {
256 			read_lock_bh(&neigh->lock);
257 			switch (neigh->nud_state) {
258 			case NUD_REACHABLE:
259 				m += 3;
260 				break;
261 
262 			case NUD_STALE:
263 			case NUD_DELAY:
264 			case NUD_PROBE:
265 				m += 2;
266 				break;
267 
268 			case NUD_NOARP:
269 			case NUD_PERMANENT:
270 				m += 1;
271 				break;
272 
273 			case NUD_INCOMPLETE:
274 			default:
275 				read_unlock_bh(&neigh->lock);
276 				continue;
277 			}
278 			read_unlock_bh(&neigh->lock);
279 		} else {
280 			continue;
281 		}
282 
283 		if (m > mpri || m >= 12) {
284 			match = sprt;
285 			mpri = m;
286 			if (m >= 12) {
287 				/* we choose the last default router if it
288 				 * is in (probably) reachable state.
289 				 * If route changed, we should do pmtu
290 				 * discovery. --yoshfuji
291 				 */
292 				break;
293 			}
294 		}
295 	}
296 
297 	spin_lock(&rt6_dflt_lock);
298 	if (!match) {
299 		/*
300 		 *	No default routers are known to be reachable.
301 		 *	SHOULD round robin
302 		 */
303 		if (rt6_dflt_pointer) {
304 			for (sprt = rt6_dflt_pointer->u.next;
305 			     sprt; sprt = sprt->u.next) {
306 				if (sprt->u.dst.obsolete <= 0 &&
307 				    sprt->u.dst.error == 0 &&
308 				    !rt6_check_expired(sprt)) {
309 					match = sprt;
310 					break;
311 				}
312 			}
313 			for (sprt = rt;
314 			     !match && sprt;
315 			     sprt = sprt->u.next) {
316 				if (sprt->u.dst.obsolete <= 0 &&
317 				    sprt->u.dst.error == 0 &&
318 				    !rt6_check_expired(sprt)) {
319 					match = sprt;
320 					break;
321 				}
322 				if (sprt == rt6_dflt_pointer)
323 					break;
324 			}
325 		}
326 	}
327 
328 	if (match) {
329 		if (rt6_dflt_pointer != match)
330 			RT6_TRACE("changed default router: %p->%p\n",
331 				  rt6_dflt_pointer, match);
332 		rt6_dflt_pointer = match;
333 	}
334 	spin_unlock(&rt6_dflt_lock);
335 
336 	if (!match) {
337 		/*
338 		 * Last Resort: if no default routers found,
339 		 * use addrconf default route.
340 		 * We don't record this route.
341 		 */
342 		for (sprt = ip6_routing_table.leaf;
343 		     sprt; sprt = sprt->u.next) {
344 			if (!rt6_check_expired(sprt) &&
345 			    (sprt->rt6i_flags & RTF_DEFAULT) &&
346 			    (!oif ||
347 			     (sprt->rt6i_dev &&
348 			      sprt->rt6i_dev->ifindex == oif))) {
349 				match = sprt;
350 				break;
351 			}
352 		}
353 		if (!match) {
354 			/* no default route.  give up. */
355 			match = &ip6_null_entry;
356 		}
357 	}
358 
359 	return match;
360 }
361 
362 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
363 			    int oif, int strict)
364 {
365 	struct fib6_node *fn;
366 	struct rt6_info *rt;
367 
368 	read_lock_bh(&rt6_lock);
369 	fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
370 	rt = rt6_device_match(fn->leaf, oif, strict);
371 	dst_hold(&rt->u.dst);
372 	rt->u.dst.__use++;
373 	read_unlock_bh(&rt6_lock);
374 
375 	rt->u.dst.lastuse = jiffies;
376 	if (rt->u.dst.error == 0)
377 		return rt;
378 	dst_release(&rt->u.dst);
379 	return NULL;
380 }
381 
382 /* ip6_ins_rt is called with FREE rt6_lock.
383    It takes new route entry, the addition fails by any reason the
384    route is freed. In any case, if caller does not hold it, it may
385    be destroyed.
386  */
387 
388 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
389 		void *_rtattr, struct netlink_skb_parms *req)
390 {
391 	int err;
392 
393 	write_lock_bh(&rt6_lock);
394 	err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
395 	write_unlock_bh(&rt6_lock);
396 
397 	return err;
398 }
399 
400 /* No rt6_lock! If COW failed, the function returns dead route entry
401    with dst->error set to errno value.
402  */
403 
404 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
405 				struct in6_addr *saddr, struct netlink_skb_parms *req)
406 {
407 	int err;
408 	struct rt6_info *rt;
409 
410 	/*
411 	 *	Clone the route.
412 	 */
413 
414 	rt = ip6_rt_copy(ort);
415 
416 	if (rt) {
417 		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
418 			if (rt->rt6i_dst.plen != 128 &&
419 			    ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
420 				rt->rt6i_flags |= RTF_ANYCAST;
421 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
422 		}
423 
424 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
425 		rt->rt6i_dst.plen = 128;
426 		rt->rt6i_flags |= RTF_CACHE;
427 		rt->u.dst.flags |= DST_HOST;
428 
429 #ifdef CONFIG_IPV6_SUBTREES
430 		if (rt->rt6i_src.plen && saddr) {
431 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
432 			rt->rt6i_src.plen = 128;
433 		}
434 #endif
435 
436 		rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
437 
438 		dst_hold(&rt->u.dst);
439 
440 		err = ip6_ins_rt(rt, NULL, NULL, req);
441 		if (err == 0)
442 			return rt;
443 
444 		rt->u.dst.error = err;
445 
446 		return rt;
447 	}
448 	dst_hold(&ip6_null_entry.u.dst);
449 	return &ip6_null_entry;
450 }
451 
452 #define BACKTRACK() \
453 if (rt == &ip6_null_entry && strict) { \
454        while ((fn = fn->parent) != NULL) { \
455 		if (fn->fn_flags & RTN_ROOT) { \
456 			dst_hold(&rt->u.dst); \
457 			goto out; \
458 		} \
459 		if (fn->fn_flags & RTN_RTINFO) \
460 			goto restart; \
461 	} \
462 }
463 
464 
465 void ip6_route_input(struct sk_buff *skb)
466 {
467 	struct fib6_node *fn;
468 	struct rt6_info *rt;
469 	int strict;
470 	int attempts = 3;
471 
472 	strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
473 
474 relookup:
475 	read_lock_bh(&rt6_lock);
476 
477 	fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
478 			 &skb->nh.ipv6h->saddr);
479 
480 restart:
481 	rt = fn->leaf;
482 
483 	if ((rt->rt6i_flags & RTF_CACHE)) {
484 		rt = rt6_device_match(rt, skb->dev->ifindex, strict);
485 		BACKTRACK();
486 		dst_hold(&rt->u.dst);
487 		goto out;
488 	}
489 
490 	rt = rt6_device_match(rt, skb->dev->ifindex, strict);
491 	BACKTRACK();
492 
493 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
494 		struct rt6_info *nrt;
495 		dst_hold(&rt->u.dst);
496 		read_unlock_bh(&rt6_lock);
497 
498 		nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
499 			      &skb->nh.ipv6h->saddr,
500 			      &NETLINK_CB(skb));
501 
502 		dst_release(&rt->u.dst);
503 		rt = nrt;
504 
505 		if (rt->u.dst.error != -EEXIST || --attempts <= 0)
506 			goto out2;
507 
508 		/* Race condition! In the gap, when rt6_lock was
509 		   released someone could insert this route.  Relookup.
510 		*/
511 		dst_release(&rt->u.dst);
512 		goto relookup;
513 	}
514 	dst_hold(&rt->u.dst);
515 
516 out:
517 	read_unlock_bh(&rt6_lock);
518 out2:
519 	rt->u.dst.lastuse = jiffies;
520 	rt->u.dst.__use++;
521 	skb->dst = (struct dst_entry *) rt;
522 }
523 
524 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
525 {
526 	struct fib6_node *fn;
527 	struct rt6_info *rt;
528 	int strict;
529 	int attempts = 3;
530 
531 	strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
532 
533 relookup:
534 	read_lock_bh(&rt6_lock);
535 
536 	fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
537 
538 restart:
539 	rt = fn->leaf;
540 
541 	if ((rt->rt6i_flags & RTF_CACHE)) {
542 		rt = rt6_device_match(rt, fl->oif, strict);
543 		BACKTRACK();
544 		dst_hold(&rt->u.dst);
545 		goto out;
546 	}
547 	if (rt->rt6i_flags & RTF_DEFAULT) {
548 		if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
549 			rt = rt6_best_dflt(rt, fl->oif);
550 	} else {
551 		rt = rt6_device_match(rt, fl->oif, strict);
552 		BACKTRACK();
553 	}
554 
555 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
556 		struct rt6_info *nrt;
557 		dst_hold(&rt->u.dst);
558 		read_unlock_bh(&rt6_lock);
559 
560 		nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL);
561 
562 		dst_release(&rt->u.dst);
563 		rt = nrt;
564 
565 		if (rt->u.dst.error != -EEXIST || --attempts <= 0)
566 			goto out2;
567 
568 		/* Race condition! In the gap, when rt6_lock was
569 		   released someone could insert this route.  Relookup.
570 		*/
571 		dst_release(&rt->u.dst);
572 		goto relookup;
573 	}
574 	dst_hold(&rt->u.dst);
575 
576 out:
577 	read_unlock_bh(&rt6_lock);
578 out2:
579 	rt->u.dst.lastuse = jiffies;
580 	rt->u.dst.__use++;
581 	return &rt->u.dst;
582 }
583 
584 
585 /*
586  *	Destination cache support functions
587  */
588 
589 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
590 {
591 	struct rt6_info *rt;
592 
593 	rt = (struct rt6_info *) dst;
594 
595 	if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
596 		return dst;
597 
598 	return NULL;
599 }
600 
601 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
602 {
603 	struct rt6_info *rt = (struct rt6_info *) dst;
604 
605 	if (rt) {
606 		if (rt->rt6i_flags & RTF_CACHE)
607 			ip6_del_rt(rt, NULL, NULL, NULL);
608 		else
609 			dst_release(dst);
610 	}
611 	return NULL;
612 }
613 
614 static void ip6_link_failure(struct sk_buff *skb)
615 {
616 	struct rt6_info *rt;
617 
618 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
619 
620 	rt = (struct rt6_info *) skb->dst;
621 	if (rt) {
622 		if (rt->rt6i_flags&RTF_CACHE) {
623 			dst_set_expires(&rt->u.dst, 0);
624 			rt->rt6i_flags |= RTF_EXPIRES;
625 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
626 			rt->rt6i_node->fn_sernum = -1;
627 	}
628 }
629 
630 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
631 {
632 	struct rt6_info *rt6 = (struct rt6_info*)dst;
633 
634 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
635 		rt6->rt6i_flags |= RTF_MODIFIED;
636 		if (mtu < IPV6_MIN_MTU) {
637 			mtu = IPV6_MIN_MTU;
638 			dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
639 		}
640 		dst->metrics[RTAX_MTU-1] = mtu;
641 	}
642 }
643 
644 /* Protected by rt6_lock.  */
645 static struct dst_entry *ndisc_dst_gc_list;
646 static int ipv6_get_mtu(struct net_device *dev);
647 
648 static inline unsigned int ipv6_advmss(unsigned int mtu)
649 {
650 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
651 
652 	if (mtu < ip6_rt_min_advmss)
653 		mtu = ip6_rt_min_advmss;
654 
655 	/*
656 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
657 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
658 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
659 	 * rely only on pmtu discovery"
660 	 */
661 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
662 		mtu = IPV6_MAXPLEN;
663 	return mtu;
664 }
665 
666 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
667 				  struct neighbour *neigh,
668 				  struct in6_addr *addr,
669 				  int (*output)(struct sk_buff *))
670 {
671 	struct rt6_info *rt;
672 	struct inet6_dev *idev = in6_dev_get(dev);
673 
674 	if (unlikely(idev == NULL))
675 		return NULL;
676 
677 	rt = ip6_dst_alloc();
678 	if (unlikely(rt == NULL)) {
679 		in6_dev_put(idev);
680 		goto out;
681 	}
682 
683 	dev_hold(dev);
684 	if (neigh)
685 		neigh_hold(neigh);
686 	else
687 		neigh = ndisc_get_neigh(dev, addr);
688 
689 	rt->rt6i_dev	  = dev;
690 	rt->rt6i_idev     = idev;
691 	rt->rt6i_nexthop  = neigh;
692 	atomic_set(&rt->u.dst.__refcnt, 1);
693 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
694 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
695 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
696 	rt->u.dst.output  = output;
697 
698 #if 0	/* there's no chance to use these for ndisc */
699 	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
700 				? DST_HOST
701 				: 0;
702 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
703 	rt->rt6i_dst.plen = 128;
704 #endif
705 
706 	write_lock_bh(&rt6_lock);
707 	rt->u.dst.next = ndisc_dst_gc_list;
708 	ndisc_dst_gc_list = &rt->u.dst;
709 	write_unlock_bh(&rt6_lock);
710 
711 	fib6_force_start_gc();
712 
713 out:
714 	return (struct dst_entry *)rt;
715 }
716 
717 int ndisc_dst_gc(int *more)
718 {
719 	struct dst_entry *dst, *next, **pprev;
720 	int freed;
721 
722 	next = NULL;
723 	pprev = &ndisc_dst_gc_list;
724 	freed = 0;
725 	while ((dst = *pprev) != NULL) {
726 		if (!atomic_read(&dst->__refcnt)) {
727 			*pprev = dst->next;
728 			dst_free(dst);
729 			freed++;
730 		} else {
731 			pprev = &dst->next;
732 			(*more)++;
733 		}
734 	}
735 
736 	return freed;
737 }
738 
739 static int ip6_dst_gc(void)
740 {
741 	static unsigned expire = 30*HZ;
742 	static unsigned long last_gc;
743 	unsigned long now = jiffies;
744 
745 	if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
746 	    atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
747 		goto out;
748 
749 	expire++;
750 	fib6_run_gc(expire);
751 	last_gc = now;
752 	if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
753 		expire = ip6_rt_gc_timeout>>1;
754 
755 out:
756 	expire -= expire>>ip6_rt_gc_elasticity;
757 	return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
758 }
759 
760 /* Clean host part of a prefix. Not necessary in radix tree,
761    but results in cleaner routing tables.
762 
763    Remove it only when all the things will work!
764  */
765 
766 static int ipv6_get_mtu(struct net_device *dev)
767 {
768 	int mtu = IPV6_MIN_MTU;
769 	struct inet6_dev *idev;
770 
771 	idev = in6_dev_get(dev);
772 	if (idev) {
773 		mtu = idev->cnf.mtu6;
774 		in6_dev_put(idev);
775 	}
776 	return mtu;
777 }
778 
779 int ipv6_get_hoplimit(struct net_device *dev)
780 {
781 	int hoplimit = ipv6_devconf.hop_limit;
782 	struct inet6_dev *idev;
783 
784 	idev = in6_dev_get(dev);
785 	if (idev) {
786 		hoplimit = idev->cnf.hop_limit;
787 		in6_dev_put(idev);
788 	}
789 	return hoplimit;
790 }
791 
792 /*
793  *
794  */
795 
796 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
797 		void *_rtattr, struct netlink_skb_parms *req)
798 {
799 	int err;
800 	struct rtmsg *r;
801 	struct rtattr **rta;
802 	struct rt6_info *rt = NULL;
803 	struct net_device *dev = NULL;
804 	struct inet6_dev *idev = NULL;
805 	int addr_type;
806 
807 	rta = (struct rtattr **) _rtattr;
808 
809 	if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
810 		return -EINVAL;
811 #ifndef CONFIG_IPV6_SUBTREES
812 	if (rtmsg->rtmsg_src_len)
813 		return -EINVAL;
814 #endif
815 	if (rtmsg->rtmsg_ifindex) {
816 		err = -ENODEV;
817 		dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
818 		if (!dev)
819 			goto out;
820 		idev = in6_dev_get(dev);
821 		if (!idev)
822 			goto out;
823 	}
824 
825 	if (rtmsg->rtmsg_metric == 0)
826 		rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
827 
828 	rt = ip6_dst_alloc();
829 
830 	if (rt == NULL) {
831 		err = -ENOMEM;
832 		goto out;
833 	}
834 
835 	rt->u.dst.obsolete = -1;
836 	rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
837 	if (nlh && (r = NLMSG_DATA(nlh))) {
838 		rt->rt6i_protocol = r->rtm_protocol;
839 	} else {
840 		rt->rt6i_protocol = RTPROT_BOOT;
841 	}
842 
843 	addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
844 
845 	if (addr_type & IPV6_ADDR_MULTICAST)
846 		rt->u.dst.input = ip6_mc_input;
847 	else
848 		rt->u.dst.input = ip6_forward;
849 
850 	rt->u.dst.output = ip6_output;
851 
852 	ipv6_addr_prefix(&rt->rt6i_dst.addr,
853 			 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
854 	rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
855 	if (rt->rt6i_dst.plen == 128)
856 	       rt->u.dst.flags = DST_HOST;
857 
858 #ifdef CONFIG_IPV6_SUBTREES
859 	ipv6_addr_prefix(&rt->rt6i_src.addr,
860 			 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
861 	rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
862 #endif
863 
864 	rt->rt6i_metric = rtmsg->rtmsg_metric;
865 
866 	/* We cannot add true routes via loopback here,
867 	   they would result in kernel looping; promote them to reject routes
868 	 */
869 	if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
870 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
871 		/* hold loopback dev/idev if we haven't done so. */
872 		if (dev != &loopback_dev) {
873 			if (dev) {
874 				dev_put(dev);
875 				in6_dev_put(idev);
876 			}
877 			dev = &loopback_dev;
878 			dev_hold(dev);
879 			idev = in6_dev_get(dev);
880 			if (!idev) {
881 				err = -ENODEV;
882 				goto out;
883 			}
884 		}
885 		rt->u.dst.output = ip6_pkt_discard_out;
886 		rt->u.dst.input = ip6_pkt_discard;
887 		rt->u.dst.error = -ENETUNREACH;
888 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
889 		goto install_route;
890 	}
891 
892 	if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
893 		struct in6_addr *gw_addr;
894 		int gwa_type;
895 
896 		gw_addr = &rtmsg->rtmsg_gateway;
897 		ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
898 		gwa_type = ipv6_addr_type(gw_addr);
899 
900 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
901 			struct rt6_info *grt;
902 
903 			/* IPv6 strictly inhibits using not link-local
904 			   addresses as nexthop address.
905 			   Otherwise, router will not able to send redirects.
906 			   It is very good, but in some (rare!) circumstances
907 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
908 			   some exceptions. --ANK
909 			 */
910 			err = -EINVAL;
911 			if (!(gwa_type&IPV6_ADDR_UNICAST))
912 				goto out;
913 
914 			grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
915 
916 			err = -EHOSTUNREACH;
917 			if (grt == NULL)
918 				goto out;
919 			if (dev) {
920 				if (dev != grt->rt6i_dev) {
921 					dst_release(&grt->u.dst);
922 					goto out;
923 				}
924 			} else {
925 				dev = grt->rt6i_dev;
926 				idev = grt->rt6i_idev;
927 				dev_hold(dev);
928 				in6_dev_hold(grt->rt6i_idev);
929 			}
930 			if (!(grt->rt6i_flags&RTF_GATEWAY))
931 				err = 0;
932 			dst_release(&grt->u.dst);
933 
934 			if (err)
935 				goto out;
936 		}
937 		err = -EINVAL;
938 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
939 			goto out;
940 	}
941 
942 	err = -ENODEV;
943 	if (dev == NULL)
944 		goto out;
945 
946 	if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
947 		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
948 		if (IS_ERR(rt->rt6i_nexthop)) {
949 			err = PTR_ERR(rt->rt6i_nexthop);
950 			rt->rt6i_nexthop = NULL;
951 			goto out;
952 		}
953 	}
954 
955 	rt->rt6i_flags = rtmsg->rtmsg_flags;
956 
957 install_route:
958 	if (rta && rta[RTA_METRICS-1]) {
959 		int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
960 		struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
961 
962 		while (RTA_OK(attr, attrlen)) {
963 			unsigned flavor = attr->rta_type;
964 			if (flavor) {
965 				if (flavor > RTAX_MAX) {
966 					err = -EINVAL;
967 					goto out;
968 				}
969 				rt->u.dst.metrics[flavor-1] =
970 					*(u32 *)RTA_DATA(attr);
971 			}
972 			attr = RTA_NEXT(attr, attrlen);
973 		}
974 	}
975 
976 	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
977 		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
978 	if (!rt->u.dst.metrics[RTAX_MTU-1])
979 		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
980 	if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
981 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
982 	rt->u.dst.dev = dev;
983 	rt->rt6i_idev = idev;
984 	return ip6_ins_rt(rt, nlh, _rtattr, req);
985 
986 out:
987 	if (dev)
988 		dev_put(dev);
989 	if (idev)
990 		in6_dev_put(idev);
991 	if (rt)
992 		dst_free((struct dst_entry *) rt);
993 	return err;
994 }
995 
996 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
997 {
998 	int err;
999 
1000 	write_lock_bh(&rt6_lock);
1001 
1002 	rt6_reset_dflt_pointer(NULL);
1003 
1004 	err = fib6_del(rt, nlh, _rtattr, req);
1005 	dst_release(&rt->u.dst);
1006 
1007 	write_unlock_bh(&rt6_lock);
1008 
1009 	return err;
1010 }
1011 
1012 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1013 {
1014 	struct fib6_node *fn;
1015 	struct rt6_info *rt;
1016 	int err = -ESRCH;
1017 
1018 	read_lock_bh(&rt6_lock);
1019 
1020 	fn = fib6_locate(&ip6_routing_table,
1021 			 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1022 			 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1023 
1024 	if (fn) {
1025 		for (rt = fn->leaf; rt; rt = rt->u.next) {
1026 			if (rtmsg->rtmsg_ifindex &&
1027 			    (rt->rt6i_dev == NULL ||
1028 			     rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1029 				continue;
1030 			if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1031 			    !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1032 				continue;
1033 			if (rtmsg->rtmsg_metric &&
1034 			    rtmsg->rtmsg_metric != rt->rt6i_metric)
1035 				continue;
1036 			dst_hold(&rt->u.dst);
1037 			read_unlock_bh(&rt6_lock);
1038 
1039 			return ip6_del_rt(rt, nlh, _rtattr, req);
1040 		}
1041 	}
1042 	read_unlock_bh(&rt6_lock);
1043 
1044 	return err;
1045 }
1046 
1047 /*
1048  *	Handle redirects
1049  */
1050 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1051 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1052 {
1053 	struct rt6_info *rt, *nrt;
1054 
1055 	/* Locate old route to this destination. */
1056 	rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1057 
1058 	if (rt == NULL)
1059 		return;
1060 
1061 	if (neigh->dev != rt->rt6i_dev)
1062 		goto out;
1063 
1064 	/*
1065 	 * Current route is on-link; redirect is always invalid.
1066 	 *
1067 	 * Seems, previous statement is not true. It could
1068 	 * be node, which looks for us as on-link (f.e. proxy ndisc)
1069 	 * But then router serving it might decide, that we should
1070 	 * know truth 8)8) --ANK (980726).
1071 	 */
1072 	if (!(rt->rt6i_flags&RTF_GATEWAY))
1073 		goto out;
1074 
1075 	/*
1076 	 *	RFC 2461 specifies that redirects should only be
1077 	 *	accepted if they come from the nexthop to the target.
1078 	 *	Due to the way default routers are chosen, this notion
1079 	 *	is a bit fuzzy and one might need to check all default
1080 	 *	routers.
1081 	 */
1082 	if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1083 		if (rt->rt6i_flags & RTF_DEFAULT) {
1084 			struct rt6_info *rt1;
1085 
1086 			read_lock(&rt6_lock);
1087 			for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1088 				if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1089 					dst_hold(&rt1->u.dst);
1090 					dst_release(&rt->u.dst);
1091 					read_unlock(&rt6_lock);
1092 					rt = rt1;
1093 					goto source_ok;
1094 				}
1095 			}
1096 			read_unlock(&rt6_lock);
1097 		}
1098 		if (net_ratelimit())
1099 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1100 			       "for redirect target\n");
1101 		goto out;
1102 	}
1103 
1104 source_ok:
1105 
1106 	/*
1107 	 *	We have finally decided to accept it.
1108 	 */
1109 
1110 	neigh_update(neigh, lladdr, NUD_STALE,
1111 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1112 		     NEIGH_UPDATE_F_OVERRIDE|
1113 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1114 				     NEIGH_UPDATE_F_ISROUTER))
1115 		     );
1116 
1117 	/*
1118 	 * Redirect received -> path was valid.
1119 	 * Look, redirects are sent only in response to data packets,
1120 	 * so that this nexthop apparently is reachable. --ANK
1121 	 */
1122 	dst_confirm(&rt->u.dst);
1123 
1124 	/* Duplicate redirect: silently ignore. */
1125 	if (neigh == rt->u.dst.neighbour)
1126 		goto out;
1127 
1128 	nrt = ip6_rt_copy(rt);
1129 	if (nrt == NULL)
1130 		goto out;
1131 
1132 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1133 	if (on_link)
1134 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1135 
1136 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1137 	nrt->rt6i_dst.plen = 128;
1138 	nrt->u.dst.flags |= DST_HOST;
1139 
1140 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1141 	nrt->rt6i_nexthop = neigh_clone(neigh);
1142 	/* Reset pmtu, it may be better */
1143 	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1144 	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1145 
1146 	if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1147 		goto out;
1148 
1149 	if (rt->rt6i_flags&RTF_CACHE) {
1150 		ip6_del_rt(rt, NULL, NULL, NULL);
1151 		return;
1152 	}
1153 
1154 out:
1155         dst_release(&rt->u.dst);
1156 	return;
1157 }
1158 
1159 /*
1160  *	Handle ICMP "packet too big" messages
1161  *	i.e. Path MTU discovery
1162  */
1163 
1164 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1165 			struct net_device *dev, u32 pmtu)
1166 {
1167 	struct rt6_info *rt, *nrt;
1168 	int allfrag = 0;
1169 
1170 	rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1171 	if (rt == NULL)
1172 		return;
1173 
1174 	if (pmtu >= dst_mtu(&rt->u.dst))
1175 		goto out;
1176 
1177 	if (pmtu < IPV6_MIN_MTU) {
1178 		/*
1179 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1180 		 * MTU (1280) and a fragment header should always be included
1181 		 * after a node receiving Too Big message reporting PMTU is
1182 		 * less than the IPv6 Minimum Link MTU.
1183 		 */
1184 		pmtu = IPV6_MIN_MTU;
1185 		allfrag = 1;
1186 	}
1187 
1188 	/* New mtu received -> path was valid.
1189 	   They are sent only in response to data packets,
1190 	   so that this nexthop apparently is reachable. --ANK
1191 	 */
1192 	dst_confirm(&rt->u.dst);
1193 
1194 	/* Host route. If it is static, it would be better
1195 	   not to override it, but add new one, so that
1196 	   when cache entry will expire old pmtu
1197 	   would return automatically.
1198 	 */
1199 	if (rt->rt6i_flags & RTF_CACHE) {
1200 		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1201 		if (allfrag)
1202 			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1203 		dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1204 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1205 		goto out;
1206 	}
1207 
1208 	/* Network route.
1209 	   Two cases are possible:
1210 	   1. It is connected route. Action: COW
1211 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1212 	 */
1213 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1214 		nrt = rt6_cow(rt, daddr, saddr, NULL);
1215 		if (!nrt->u.dst.error) {
1216 			nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1217 			if (allfrag)
1218 				nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1219 			/* According to RFC 1981, detecting PMTU increase shouldn't be
1220 			   happened within 5 mins, the recommended timer is 10 mins.
1221 			   Here this route expiration time is set to ip6_rt_mtu_expires
1222 			   which is 10 mins. After 10 mins the decreased pmtu is expired
1223 			   and detecting PMTU increase will be automatically happened.
1224 			 */
1225 			dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1226 			nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1227 		}
1228 		dst_release(&nrt->u.dst);
1229 	} else {
1230 		nrt = ip6_rt_copy(rt);
1231 		if (nrt == NULL)
1232 			goto out;
1233 		ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1234 		nrt->rt6i_dst.plen = 128;
1235 		nrt->u.dst.flags |= DST_HOST;
1236 		nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1237 		dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1238 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1239 		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1240 		if (allfrag)
1241 			nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1242 		ip6_ins_rt(nrt, NULL, NULL, NULL);
1243 	}
1244 
1245 out:
1246 	dst_release(&rt->u.dst);
1247 }
1248 
1249 /*
1250  *	Misc support functions
1251  */
1252 
1253 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1254 {
1255 	struct rt6_info *rt = ip6_dst_alloc();
1256 
1257 	if (rt) {
1258 		rt->u.dst.input = ort->u.dst.input;
1259 		rt->u.dst.output = ort->u.dst.output;
1260 
1261 		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1262 		rt->u.dst.dev = ort->u.dst.dev;
1263 		if (rt->u.dst.dev)
1264 			dev_hold(rt->u.dst.dev);
1265 		rt->rt6i_idev = ort->rt6i_idev;
1266 		if (rt->rt6i_idev)
1267 			in6_dev_hold(rt->rt6i_idev);
1268 		rt->u.dst.lastuse = jiffies;
1269 		rt->rt6i_expires = 0;
1270 
1271 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1272 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1273 		rt->rt6i_metric = 0;
1274 
1275 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1276 #ifdef CONFIG_IPV6_SUBTREES
1277 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1278 #endif
1279 	}
1280 	return rt;
1281 }
1282 
1283 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1284 {
1285 	struct rt6_info *rt;
1286 	struct fib6_node *fn;
1287 
1288 	fn = &ip6_routing_table;
1289 
1290 	write_lock_bh(&rt6_lock);
1291 	for (rt = fn->leaf; rt; rt=rt->u.next) {
1292 		if (dev == rt->rt6i_dev &&
1293 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1294 			break;
1295 	}
1296 	if (rt)
1297 		dst_hold(&rt->u.dst);
1298 	write_unlock_bh(&rt6_lock);
1299 	return rt;
1300 }
1301 
1302 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1303 				     struct net_device *dev)
1304 {
1305 	struct in6_rtmsg rtmsg;
1306 
1307 	memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1308 	rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1309 	ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1310 	rtmsg.rtmsg_metric = 1024;
1311 	rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1312 
1313 	rtmsg.rtmsg_ifindex = dev->ifindex;
1314 
1315 	ip6_route_add(&rtmsg, NULL, NULL, NULL);
1316 	return rt6_get_dflt_router(gwaddr, dev);
1317 }
1318 
1319 void rt6_purge_dflt_routers(void)
1320 {
1321 	struct rt6_info *rt;
1322 
1323 restart:
1324 	read_lock_bh(&rt6_lock);
1325 	for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1326 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1327 			dst_hold(&rt->u.dst);
1328 
1329 			rt6_reset_dflt_pointer(NULL);
1330 
1331 			read_unlock_bh(&rt6_lock);
1332 
1333 			ip6_del_rt(rt, NULL, NULL, NULL);
1334 
1335 			goto restart;
1336 		}
1337 	}
1338 	read_unlock_bh(&rt6_lock);
1339 }
1340 
1341 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1342 {
1343 	struct in6_rtmsg rtmsg;
1344 	int err;
1345 
1346 	switch(cmd) {
1347 	case SIOCADDRT:		/* Add a route */
1348 	case SIOCDELRT:		/* Delete a route */
1349 		if (!capable(CAP_NET_ADMIN))
1350 			return -EPERM;
1351 		err = copy_from_user(&rtmsg, arg,
1352 				     sizeof(struct in6_rtmsg));
1353 		if (err)
1354 			return -EFAULT;
1355 
1356 		rtnl_lock();
1357 		switch (cmd) {
1358 		case SIOCADDRT:
1359 			err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1360 			break;
1361 		case SIOCDELRT:
1362 			err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1363 			break;
1364 		default:
1365 			err = -EINVAL;
1366 		}
1367 		rtnl_unlock();
1368 
1369 		return err;
1370 	};
1371 
1372 	return -EINVAL;
1373 }
1374 
1375 /*
1376  *	Drop the packet on the floor
1377  */
1378 
1379 static int ip6_pkt_discard(struct sk_buff *skb)
1380 {
1381 	IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1382 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1383 	kfree_skb(skb);
1384 	return 0;
1385 }
1386 
1387 static int ip6_pkt_discard_out(struct sk_buff *skb)
1388 {
1389 	skb->dev = skb->dst->dev;
1390 	return ip6_pkt_discard(skb);
1391 }
1392 
1393 /*
1394  *	Allocate a dst for local (unicast / anycast) address.
1395  */
1396 
1397 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1398 				    const struct in6_addr *addr,
1399 				    int anycast)
1400 {
1401 	struct rt6_info *rt = ip6_dst_alloc();
1402 
1403 	if (rt == NULL)
1404 		return ERR_PTR(-ENOMEM);
1405 
1406 	dev_hold(&loopback_dev);
1407 	in6_dev_hold(idev);
1408 
1409 	rt->u.dst.flags = DST_HOST;
1410 	rt->u.dst.input = ip6_input;
1411 	rt->u.dst.output = ip6_output;
1412 	rt->rt6i_dev = &loopback_dev;
1413 	rt->rt6i_idev = idev;
1414 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1415 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1416 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1417 	rt->u.dst.obsolete = -1;
1418 
1419 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1420 	if (anycast)
1421 		rt->rt6i_flags |= RTF_ANYCAST;
1422 	else
1423 		rt->rt6i_flags |= RTF_LOCAL;
1424 	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1425 	if (rt->rt6i_nexthop == NULL) {
1426 		dst_free((struct dst_entry *) rt);
1427 		return ERR_PTR(-ENOMEM);
1428 	}
1429 
1430 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1431 	rt->rt6i_dst.plen = 128;
1432 
1433 	atomic_set(&rt->u.dst.__refcnt, 1);
1434 
1435 	return rt;
1436 }
1437 
1438 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1439 {
1440 	if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1441 	    rt != &ip6_null_entry) {
1442 		RT6_TRACE("deleted by ifdown %p\n", rt);
1443 		return -1;
1444 	}
1445 	return 0;
1446 }
1447 
1448 void rt6_ifdown(struct net_device *dev)
1449 {
1450 	write_lock_bh(&rt6_lock);
1451 	fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1452 	write_unlock_bh(&rt6_lock);
1453 }
1454 
1455 struct rt6_mtu_change_arg
1456 {
1457 	struct net_device *dev;
1458 	unsigned mtu;
1459 };
1460 
1461 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1462 {
1463 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1464 	struct inet6_dev *idev;
1465 
1466 	/* In IPv6 pmtu discovery is not optional,
1467 	   so that RTAX_MTU lock cannot disable it.
1468 	   We still use this lock to block changes
1469 	   caused by addrconf/ndisc.
1470 	*/
1471 
1472 	idev = __in6_dev_get(arg->dev);
1473 	if (idev == NULL)
1474 		return 0;
1475 
1476 	/* For administrative MTU increase, there is no way to discover
1477 	   IPv6 PMTU increase, so PMTU increase should be updated here.
1478 	   Since RFC 1981 doesn't include administrative MTU increase
1479 	   update PMTU increase is a MUST. (i.e. jumbo frame)
1480 	 */
1481 	/*
1482 	   If new MTU is less than route PMTU, this new MTU will be the
1483 	   lowest MTU in the path, update the route PMTU to reflect PMTU
1484 	   decreases; if new MTU is greater than route PMTU, and the
1485 	   old MTU is the lowest MTU in the path, update the route PMTU
1486 	   to reflect the increase. In this case if the other nodes' MTU
1487 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
1488 	   PMTU discouvery.
1489 	 */
1490 	if (rt->rt6i_dev == arg->dev &&
1491 	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1492             (dst_mtu(&rt->u.dst) > arg->mtu ||
1493              (dst_mtu(&rt->u.dst) < arg->mtu &&
1494 	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1495 		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1496 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1497 	return 0;
1498 }
1499 
1500 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1501 {
1502 	struct rt6_mtu_change_arg arg;
1503 
1504 	arg.dev = dev;
1505 	arg.mtu = mtu;
1506 	read_lock_bh(&rt6_lock);
1507 	fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1508 	read_unlock_bh(&rt6_lock);
1509 }
1510 
1511 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1512 			      struct in6_rtmsg *rtmsg)
1513 {
1514 	memset(rtmsg, 0, sizeof(*rtmsg));
1515 
1516 	rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1517 	rtmsg->rtmsg_src_len = r->rtm_src_len;
1518 	rtmsg->rtmsg_flags = RTF_UP;
1519 	if (r->rtm_type == RTN_UNREACHABLE)
1520 		rtmsg->rtmsg_flags |= RTF_REJECT;
1521 
1522 	if (rta[RTA_GATEWAY-1]) {
1523 		if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1524 			return -EINVAL;
1525 		memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1526 		rtmsg->rtmsg_flags |= RTF_GATEWAY;
1527 	}
1528 	if (rta[RTA_DST-1]) {
1529 		if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1530 			return -EINVAL;
1531 		memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1532 	}
1533 	if (rta[RTA_SRC-1]) {
1534 		if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1535 			return -EINVAL;
1536 		memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1537 	}
1538 	if (rta[RTA_OIF-1]) {
1539 		if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1540 			return -EINVAL;
1541 		memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1542 	}
1543 	if (rta[RTA_PRIORITY-1]) {
1544 		if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1545 			return -EINVAL;
1546 		memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1547 	}
1548 	return 0;
1549 }
1550 
1551 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1552 {
1553 	struct rtmsg *r = NLMSG_DATA(nlh);
1554 	struct in6_rtmsg rtmsg;
1555 
1556 	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1557 		return -EINVAL;
1558 	return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1559 }
1560 
1561 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1562 {
1563 	struct rtmsg *r = NLMSG_DATA(nlh);
1564 	struct in6_rtmsg rtmsg;
1565 
1566 	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1567 		return -EINVAL;
1568 	return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1569 }
1570 
1571 struct rt6_rtnl_dump_arg
1572 {
1573 	struct sk_buff *skb;
1574 	struct netlink_callback *cb;
1575 };
1576 
1577 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1578 			 struct in6_addr *dst, struct in6_addr *src,
1579 			 int iif, int type, u32 pid, u32 seq,
1580 			 int prefix, unsigned int flags)
1581 {
1582 	struct rtmsg *rtm;
1583 	struct nlmsghdr  *nlh;
1584 	unsigned char	 *b = skb->tail;
1585 	struct rta_cacheinfo ci;
1586 
1587 	if (prefix) {	/* user wants prefix routes only */
1588 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1589 			/* success since this is not a prefix route */
1590 			return 1;
1591 		}
1592 	}
1593 
1594 	nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1595 	rtm = NLMSG_DATA(nlh);
1596 	rtm->rtm_family = AF_INET6;
1597 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
1598 	rtm->rtm_src_len = rt->rt6i_src.plen;
1599 	rtm->rtm_tos = 0;
1600 	rtm->rtm_table = RT_TABLE_MAIN;
1601 	if (rt->rt6i_flags&RTF_REJECT)
1602 		rtm->rtm_type = RTN_UNREACHABLE;
1603 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1604 		rtm->rtm_type = RTN_LOCAL;
1605 	else
1606 		rtm->rtm_type = RTN_UNICAST;
1607 	rtm->rtm_flags = 0;
1608 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1609 	rtm->rtm_protocol = rt->rt6i_protocol;
1610 	if (rt->rt6i_flags&RTF_DYNAMIC)
1611 		rtm->rtm_protocol = RTPROT_REDIRECT;
1612 	else if (rt->rt6i_flags & RTF_ADDRCONF)
1613 		rtm->rtm_protocol = RTPROT_KERNEL;
1614 	else if (rt->rt6i_flags&RTF_DEFAULT)
1615 		rtm->rtm_protocol = RTPROT_RA;
1616 
1617 	if (rt->rt6i_flags&RTF_CACHE)
1618 		rtm->rtm_flags |= RTM_F_CLONED;
1619 
1620 	if (dst) {
1621 		RTA_PUT(skb, RTA_DST, 16, dst);
1622 	        rtm->rtm_dst_len = 128;
1623 	} else if (rtm->rtm_dst_len)
1624 		RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1625 #ifdef CONFIG_IPV6_SUBTREES
1626 	if (src) {
1627 		RTA_PUT(skb, RTA_SRC, 16, src);
1628 	        rtm->rtm_src_len = 128;
1629 	} else if (rtm->rtm_src_len)
1630 		RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1631 #endif
1632 	if (iif)
1633 		RTA_PUT(skb, RTA_IIF, 4, &iif);
1634 	else if (dst) {
1635 		struct in6_addr saddr_buf;
1636 		if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1637 			RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1638 	}
1639 	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1640 		goto rtattr_failure;
1641 	if (rt->u.dst.neighbour)
1642 		RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1643 	if (rt->u.dst.dev)
1644 		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1645 	RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1646 	ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1647 	if (rt->rt6i_expires)
1648 		ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1649 	else
1650 		ci.rta_expires = 0;
1651 	ci.rta_used = rt->u.dst.__use;
1652 	ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1653 	ci.rta_error = rt->u.dst.error;
1654 	ci.rta_id = 0;
1655 	ci.rta_ts = 0;
1656 	ci.rta_tsage = 0;
1657 	RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1658 	nlh->nlmsg_len = skb->tail - b;
1659 	return skb->len;
1660 
1661 nlmsg_failure:
1662 rtattr_failure:
1663 	skb_trim(skb, b - skb->data);
1664 	return -1;
1665 }
1666 
1667 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1668 {
1669 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1670 	int prefix;
1671 
1672 	if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1673 		struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1674 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1675 	} else
1676 		prefix = 0;
1677 
1678 	return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1679 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1680 		     prefix, NLM_F_MULTI);
1681 }
1682 
1683 static int fib6_dump_node(struct fib6_walker_t *w)
1684 {
1685 	int res;
1686 	struct rt6_info *rt;
1687 
1688 	for (rt = w->leaf; rt; rt = rt->u.next) {
1689 		res = rt6_dump_route(rt, w->args);
1690 		if (res < 0) {
1691 			/* Frame is full, suspend walking */
1692 			w->leaf = rt;
1693 			return 1;
1694 		}
1695 		BUG_TRAP(res!=0);
1696 	}
1697 	w->leaf = NULL;
1698 	return 0;
1699 }
1700 
1701 static void fib6_dump_end(struct netlink_callback *cb)
1702 {
1703 	struct fib6_walker_t *w = (void*)cb->args[0];
1704 
1705 	if (w) {
1706 		cb->args[0] = 0;
1707 		fib6_walker_unlink(w);
1708 		kfree(w);
1709 	}
1710 	cb->done = (void*)cb->args[1];
1711 	cb->args[1] = 0;
1712 }
1713 
1714 static int fib6_dump_done(struct netlink_callback *cb)
1715 {
1716 	fib6_dump_end(cb);
1717 	return cb->done ? cb->done(cb) : 0;
1718 }
1719 
1720 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1721 {
1722 	struct rt6_rtnl_dump_arg arg;
1723 	struct fib6_walker_t *w;
1724 	int res;
1725 
1726 	arg.skb = skb;
1727 	arg.cb = cb;
1728 
1729 	w = (void*)cb->args[0];
1730 	if (w == NULL) {
1731 		/* New dump:
1732 		 *
1733 		 * 1. hook callback destructor.
1734 		 */
1735 		cb->args[1] = (long)cb->done;
1736 		cb->done = fib6_dump_done;
1737 
1738 		/*
1739 		 * 2. allocate and initialize walker.
1740 		 */
1741 		w = kmalloc(sizeof(*w), GFP_ATOMIC);
1742 		if (w == NULL)
1743 			return -ENOMEM;
1744 		RT6_TRACE("dump<%p", w);
1745 		memset(w, 0, sizeof(*w));
1746 		w->root = &ip6_routing_table;
1747 		w->func = fib6_dump_node;
1748 		w->args = &arg;
1749 		cb->args[0] = (long)w;
1750 		read_lock_bh(&rt6_lock);
1751 		res = fib6_walk(w);
1752 		read_unlock_bh(&rt6_lock);
1753 	} else {
1754 		w->args = &arg;
1755 		read_lock_bh(&rt6_lock);
1756 		res = fib6_walk_continue(w);
1757 		read_unlock_bh(&rt6_lock);
1758 	}
1759 #if RT6_DEBUG >= 3
1760 	if (res <= 0 && skb->len == 0)
1761 		RT6_TRACE("%p>dump end\n", w);
1762 #endif
1763 	res = res < 0 ? res : skb->len;
1764 	/* res < 0 is an error. (really, impossible)
1765 	   res == 0 means that dump is complete, but skb still can contain data.
1766 	   res > 0 dump is not complete, but frame is full.
1767 	 */
1768 	/* Destroy walker, if dump of this table is complete. */
1769 	if (res <= 0)
1770 		fib6_dump_end(cb);
1771 	return res;
1772 }
1773 
1774 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1775 {
1776 	struct rtattr **rta = arg;
1777 	int iif = 0;
1778 	int err = -ENOBUFS;
1779 	struct sk_buff *skb;
1780 	struct flowi fl;
1781 	struct rt6_info *rt;
1782 
1783 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1784 	if (skb == NULL)
1785 		goto out;
1786 
1787 	/* Reserve room for dummy headers, this skb can pass
1788 	   through good chunk of routing engine.
1789 	 */
1790 	skb->mac.raw = skb->data;
1791 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1792 
1793 	memset(&fl, 0, sizeof(fl));
1794 	if (rta[RTA_SRC-1])
1795 		ipv6_addr_copy(&fl.fl6_src,
1796 			       (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1797 	if (rta[RTA_DST-1])
1798 		ipv6_addr_copy(&fl.fl6_dst,
1799 			       (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1800 
1801 	if (rta[RTA_IIF-1])
1802 		memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1803 
1804 	if (iif) {
1805 		struct net_device *dev;
1806 		dev = __dev_get_by_index(iif);
1807 		if (!dev) {
1808 			err = -ENODEV;
1809 			goto out_free;
1810 		}
1811 	}
1812 
1813 	fl.oif = 0;
1814 	if (rta[RTA_OIF-1])
1815 		memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1816 
1817 	rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1818 
1819 	skb->dst = &rt->u.dst;
1820 
1821 	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1822 	err = rt6_fill_node(skb, rt,
1823 			    &fl.fl6_dst, &fl.fl6_src,
1824 			    iif,
1825 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1826 			    nlh->nlmsg_seq, 0, 0);
1827 	if (err < 0) {
1828 		err = -EMSGSIZE;
1829 		goto out_free;
1830 	}
1831 
1832 	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1833 	if (err > 0)
1834 		err = 0;
1835 out:
1836 	return err;
1837 out_free:
1838 	kfree_skb(skb);
1839 	goto out;
1840 }
1841 
1842 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1843 			struct netlink_skb_parms *req)
1844 {
1845 	struct sk_buff *skb;
1846 	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1847 	u32 pid = current->pid;
1848 	u32 seq = 0;
1849 
1850 	if (req)
1851 		pid = req->pid;
1852 	if (nlh)
1853 		seq = nlh->nlmsg_seq;
1854 
1855 	skb = alloc_skb(size, gfp_any());
1856 	if (!skb) {
1857 		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1858 		return;
1859 	}
1860 	if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1861 		kfree_skb(skb);
1862 		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1863 		return;
1864 	}
1865 	NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1866 	netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1867 }
1868 
1869 /*
1870  *	/proc
1871  */
1872 
1873 #ifdef CONFIG_PROC_FS
1874 
1875 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1876 
1877 struct rt6_proc_arg
1878 {
1879 	char *buffer;
1880 	int offset;
1881 	int length;
1882 	int skip;
1883 	int len;
1884 };
1885 
1886 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1887 {
1888 	struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1889 	int i;
1890 
1891 	if (arg->skip < arg->offset / RT6_INFO_LEN) {
1892 		arg->skip++;
1893 		return 0;
1894 	}
1895 
1896 	if (arg->len >= arg->length)
1897 		return 0;
1898 
1899 	for (i=0; i<16; i++) {
1900 		sprintf(arg->buffer + arg->len, "%02x",
1901 			rt->rt6i_dst.addr.s6_addr[i]);
1902 		arg->len += 2;
1903 	}
1904 	arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1905 			    rt->rt6i_dst.plen);
1906 
1907 #ifdef CONFIG_IPV6_SUBTREES
1908 	for (i=0; i<16; i++) {
1909 		sprintf(arg->buffer + arg->len, "%02x",
1910 			rt->rt6i_src.addr.s6_addr[i]);
1911 		arg->len += 2;
1912 	}
1913 	arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1914 			    rt->rt6i_src.plen);
1915 #else
1916 	sprintf(arg->buffer + arg->len,
1917 		"00000000000000000000000000000000 00 ");
1918 	arg->len += 36;
1919 #endif
1920 
1921 	if (rt->rt6i_nexthop) {
1922 		for (i=0; i<16; i++) {
1923 			sprintf(arg->buffer + arg->len, "%02x",
1924 				rt->rt6i_nexthop->primary_key[i]);
1925 			arg->len += 2;
1926 		}
1927 	} else {
1928 		sprintf(arg->buffer + arg->len,
1929 			"00000000000000000000000000000000");
1930 		arg->len += 32;
1931 	}
1932 	arg->len += sprintf(arg->buffer + arg->len,
1933 			    " %08x %08x %08x %08x %8s\n",
1934 			    rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1935 			    rt->u.dst.__use, rt->rt6i_flags,
1936 			    rt->rt6i_dev ? rt->rt6i_dev->name : "");
1937 	return 0;
1938 }
1939 
1940 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1941 {
1942 	struct rt6_proc_arg arg;
1943 	arg.buffer = buffer;
1944 	arg.offset = offset;
1945 	arg.length = length;
1946 	arg.skip = 0;
1947 	arg.len = 0;
1948 
1949 	read_lock_bh(&rt6_lock);
1950 	fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1951 	read_unlock_bh(&rt6_lock);
1952 
1953 	*start = buffer;
1954 	if (offset)
1955 		*start += offset % RT6_INFO_LEN;
1956 
1957 	arg.len -= offset % RT6_INFO_LEN;
1958 
1959 	if (arg.len > length)
1960 		arg.len = length;
1961 	if (arg.len < 0)
1962 		arg.len = 0;
1963 
1964 	return arg.len;
1965 }
1966 
1967 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1968 {
1969 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1970 		      rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1971 		      rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1972 		      rt6_stats.fib_rt_cache,
1973 		      atomic_read(&ip6_dst_ops.entries),
1974 		      rt6_stats.fib_discarded_routes);
1975 
1976 	return 0;
1977 }
1978 
1979 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1980 {
1981 	return single_open(file, rt6_stats_seq_show, NULL);
1982 }
1983 
1984 static struct file_operations rt6_stats_seq_fops = {
1985 	.owner	 = THIS_MODULE,
1986 	.open	 = rt6_stats_seq_open,
1987 	.read	 = seq_read,
1988 	.llseek	 = seq_lseek,
1989 	.release = single_release,
1990 };
1991 #endif	/* CONFIG_PROC_FS */
1992 
1993 #ifdef CONFIG_SYSCTL
1994 
1995 static int flush_delay;
1996 
1997 static
1998 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1999 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2000 {
2001 	if (write) {
2002 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2003 		fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2004 		return 0;
2005 	} else
2006 		return -EINVAL;
2007 }
2008 
2009 ctl_table ipv6_route_table[] = {
2010         {
2011 		.ctl_name	=	NET_IPV6_ROUTE_FLUSH,
2012 		.procname	=	"flush",
2013          	.data		=	&flush_delay,
2014 		.maxlen		=	sizeof(int),
2015 		.mode		=	0200,
2016          	.proc_handler	=	&ipv6_sysctl_rtcache_flush
2017 	},
2018 	{
2019 		.ctl_name	=	NET_IPV6_ROUTE_GC_THRESH,
2020 		.procname	=	"gc_thresh",
2021          	.data		=	&ip6_dst_ops.gc_thresh,
2022 		.maxlen		=	sizeof(int),
2023 		.mode		=	0644,
2024          	.proc_handler	=	&proc_dointvec,
2025 	},
2026 	{
2027 		.ctl_name	=	NET_IPV6_ROUTE_MAX_SIZE,
2028 		.procname	=	"max_size",
2029          	.data		=	&ip6_rt_max_size,
2030 		.maxlen		=	sizeof(int),
2031 		.mode		=	0644,
2032          	.proc_handler	=	&proc_dointvec,
2033 	},
2034 	{
2035 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2036 		.procname	=	"gc_min_interval",
2037          	.data		=	&ip6_rt_gc_min_interval,
2038 		.maxlen		=	sizeof(int),
2039 		.mode		=	0644,
2040          	.proc_handler	=	&proc_dointvec_jiffies,
2041 		.strategy	=	&sysctl_jiffies,
2042 	},
2043 	{
2044 		.ctl_name	=	NET_IPV6_ROUTE_GC_TIMEOUT,
2045 		.procname	=	"gc_timeout",
2046          	.data		=	&ip6_rt_gc_timeout,
2047 		.maxlen		=	sizeof(int),
2048 		.mode		=	0644,
2049          	.proc_handler	=	&proc_dointvec_jiffies,
2050 		.strategy	=	&sysctl_jiffies,
2051 	},
2052 	{
2053 		.ctl_name	=	NET_IPV6_ROUTE_GC_INTERVAL,
2054 		.procname	=	"gc_interval",
2055          	.data		=	&ip6_rt_gc_interval,
2056 		.maxlen		=	sizeof(int),
2057 		.mode		=	0644,
2058          	.proc_handler	=	&proc_dointvec_jiffies,
2059 		.strategy	=	&sysctl_jiffies,
2060 	},
2061 	{
2062 		.ctl_name	=	NET_IPV6_ROUTE_GC_ELASTICITY,
2063 		.procname	=	"gc_elasticity",
2064          	.data		=	&ip6_rt_gc_elasticity,
2065 		.maxlen		=	sizeof(int),
2066 		.mode		=	0644,
2067          	.proc_handler	=	&proc_dointvec_jiffies,
2068 		.strategy	=	&sysctl_jiffies,
2069 	},
2070 	{
2071 		.ctl_name	=	NET_IPV6_ROUTE_MTU_EXPIRES,
2072 		.procname	=	"mtu_expires",
2073          	.data		=	&ip6_rt_mtu_expires,
2074 		.maxlen		=	sizeof(int),
2075 		.mode		=	0644,
2076          	.proc_handler	=	&proc_dointvec_jiffies,
2077 		.strategy	=	&sysctl_jiffies,
2078 	},
2079 	{
2080 		.ctl_name	=	NET_IPV6_ROUTE_MIN_ADVMSS,
2081 		.procname	=	"min_adv_mss",
2082          	.data		=	&ip6_rt_min_advmss,
2083 		.maxlen		=	sizeof(int),
2084 		.mode		=	0644,
2085          	.proc_handler	=	&proc_dointvec_jiffies,
2086 		.strategy	=	&sysctl_jiffies,
2087 	},
2088 	{
2089 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2090 		.procname	=	"gc_min_interval_ms",
2091          	.data		=	&ip6_rt_gc_min_interval,
2092 		.maxlen		=	sizeof(int),
2093 		.mode		=	0644,
2094          	.proc_handler	=	&proc_dointvec_ms_jiffies,
2095 		.strategy	=	&sysctl_ms_jiffies,
2096 	},
2097 	{ .ctl_name = 0 }
2098 };
2099 
2100 #endif
2101 
2102 void __init ip6_route_init(void)
2103 {
2104 	struct proc_dir_entry *p;
2105 
2106 	ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2107 						     sizeof(struct rt6_info),
2108 						     0, SLAB_HWCACHE_ALIGN,
2109 						     NULL, NULL);
2110 	if (!ip6_dst_ops.kmem_cachep)
2111 		panic("cannot create ip6_dst_cache");
2112 
2113 	fib6_init();
2114 #ifdef 	CONFIG_PROC_FS
2115 	p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2116 	if (p)
2117 		p->owner = THIS_MODULE;
2118 
2119 	proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2120 #endif
2121 #ifdef CONFIG_XFRM
2122 	xfrm6_init();
2123 #endif
2124 }
2125 
2126 void ip6_route_cleanup(void)
2127 {
2128 #ifdef CONFIG_PROC_FS
2129 	proc_net_remove("ipv6_route");
2130 	proc_net_remove("rt6_stats");
2131 #endif
2132 #ifdef CONFIG_XFRM
2133 	xfrm6_fini();
2134 #endif
2135 	rt6_ifdown(NULL);
2136 	fib6_gc_cleanup();
2137 	kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2138 }
2139