xref: /linux/net/ipv6/route.c (revision 2b8232ce512105e28453f301d1510de8363bccd1)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	$Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *	This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15 
16 /*	Changes:
17  *
18  *	YOSHIFUJI Hideaki @USAGI
19  *		reworked default router selection.
20  *		- respect outgoing interface
21  *		- select from (probably) reachable routers (i.e.
22  *		routers in REACHABLE, STALE, DELAY or PROBE states).
23  *		- always select the same router if it is (probably)
24  *		reachable.  otherwise, round-robin the list.
25  *	Ville Nuorvala
26  *		Fixed routing subtrees.
27  */
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 
42 #ifdef 	CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46 
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 
61 #include <asm/uaccess.h>
62 
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
66 
67 /* Set to 3 to get tracing. */
68 #define RT6_DEBUG 2
69 
70 #if RT6_DEBUG >= 3
71 #define RDBG(x) printk x
72 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
73 #else
74 #define RDBG(x)
75 #define RT6_TRACE(x...) do { ; } while (0)
76 #endif
77 
78 #define CLONE_OFFLINK_ROUTE 0
79 
80 static int ip6_rt_max_size = 4096;
81 static int ip6_rt_gc_min_interval = HZ / 2;
82 static int ip6_rt_gc_timeout = 60*HZ;
83 int ip6_rt_gc_interval = 30*HZ;
84 static int ip6_rt_gc_elasticity = 9;
85 static int ip6_rt_mtu_expires = 10*60*HZ;
86 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87 
88 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void		ip6_dst_destroy(struct dst_entry *);
92 static void		ip6_dst_ifdown(struct dst_entry *,
93 				       struct net_device *dev, int how);
94 static int		 ip6_dst_gc(void);
95 
96 static int		ip6_pkt_discard(struct sk_buff *skb);
97 static int		ip6_pkt_discard_out(struct sk_buff *skb);
98 static void		ip6_link_failure(struct sk_buff *skb);
99 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100 
101 #ifdef CONFIG_IPV6_ROUTE_INFO
102 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103 					   struct in6_addr *gwaddr, int ifindex,
104 					   unsigned pref);
105 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106 					   struct in6_addr *gwaddr, int ifindex);
107 #endif
108 
109 static struct dst_ops ip6_dst_ops = {
110 	.family			=	AF_INET6,
111 	.protocol		=	__constant_htons(ETH_P_IPV6),
112 	.gc			=	ip6_dst_gc,
113 	.gc_thresh		=	1024,
114 	.check			=	ip6_dst_check,
115 	.destroy		=	ip6_dst_destroy,
116 	.ifdown			=	ip6_dst_ifdown,
117 	.negative_advice	=	ip6_negative_advice,
118 	.link_failure		=	ip6_link_failure,
119 	.update_pmtu		=	ip6_rt_update_pmtu,
120 	.entry_size		=	sizeof(struct rt6_info),
121 };
122 
123 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
124 {
125 }
126 
127 static struct dst_ops ip6_dst_blackhole_ops = {
128 	.family			=	AF_INET6,
129 	.protocol		=	__constant_htons(ETH_P_IPV6),
130 	.destroy		=	ip6_dst_destroy,
131 	.check			=	ip6_dst_check,
132 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
133 	.entry_size		=	sizeof(struct rt6_info),
134 };
135 
136 struct rt6_info ip6_null_entry = {
137 	.u = {
138 		.dst = {
139 			.__refcnt	= ATOMIC_INIT(1),
140 			.__use		= 1,
141 			.obsolete	= -1,
142 			.error		= -ENETUNREACH,
143 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
144 			.input		= ip6_pkt_discard,
145 			.output		= ip6_pkt_discard_out,
146 			.ops		= &ip6_dst_ops,
147 			.path		= (struct dst_entry*)&ip6_null_entry,
148 		}
149 	},
150 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
151 	.rt6i_metric	= ~(u32) 0,
152 	.rt6i_ref	= ATOMIC_INIT(1),
153 };
154 
155 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
156 
157 static int ip6_pkt_prohibit(struct sk_buff *skb);
158 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
159 static int ip6_pkt_blk_hole(struct sk_buff *skb);
160 
161 struct rt6_info ip6_prohibit_entry = {
162 	.u = {
163 		.dst = {
164 			.__refcnt	= ATOMIC_INIT(1),
165 			.__use		= 1,
166 			.obsolete	= -1,
167 			.error		= -EACCES,
168 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
169 			.input		= ip6_pkt_prohibit,
170 			.output		= ip6_pkt_prohibit_out,
171 			.ops		= &ip6_dst_ops,
172 			.path		= (struct dst_entry*)&ip6_prohibit_entry,
173 		}
174 	},
175 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
176 	.rt6i_metric	= ~(u32) 0,
177 	.rt6i_ref	= ATOMIC_INIT(1),
178 };
179 
180 struct rt6_info ip6_blk_hole_entry = {
181 	.u = {
182 		.dst = {
183 			.__refcnt	= ATOMIC_INIT(1),
184 			.__use		= 1,
185 			.obsolete	= -1,
186 			.error		= -EINVAL,
187 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
188 			.input		= ip6_pkt_blk_hole,
189 			.output		= ip6_pkt_blk_hole,
190 			.ops		= &ip6_dst_ops,
191 			.path		= (struct dst_entry*)&ip6_blk_hole_entry,
192 		}
193 	},
194 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
195 	.rt6i_metric	= ~(u32) 0,
196 	.rt6i_ref	= ATOMIC_INIT(1),
197 };
198 
199 #endif
200 
201 /* allocate dst with ip6_dst_ops */
202 static __inline__ struct rt6_info *ip6_dst_alloc(void)
203 {
204 	return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
205 }
206 
207 static void ip6_dst_destroy(struct dst_entry *dst)
208 {
209 	struct rt6_info *rt = (struct rt6_info *)dst;
210 	struct inet6_dev *idev = rt->rt6i_idev;
211 
212 	if (idev != NULL) {
213 		rt->rt6i_idev = NULL;
214 		in6_dev_put(idev);
215 	}
216 }
217 
218 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
219 			   int how)
220 {
221 	struct rt6_info *rt = (struct rt6_info *)dst;
222 	struct inet6_dev *idev = rt->rt6i_idev;
223 
224 	if (dev != init_net.loopback_dev && idev != NULL && idev->dev == dev) {
225 		struct inet6_dev *loopback_idev = in6_dev_get(init_net.loopback_dev);
226 		if (loopback_idev != NULL) {
227 			rt->rt6i_idev = loopback_idev;
228 			in6_dev_put(idev);
229 		}
230 	}
231 }
232 
233 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
234 {
235 	return (rt->rt6i_flags & RTF_EXPIRES &&
236 		time_after(jiffies, rt->rt6i_expires));
237 }
238 
239 static inline int rt6_need_strict(struct in6_addr *daddr)
240 {
241 	return (ipv6_addr_type(daddr) &
242 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
243 }
244 
245 /*
246  *	Route lookup. Any table->tb6_lock is implied.
247  */
248 
249 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
250 						    int oif,
251 						    int strict)
252 {
253 	struct rt6_info *local = NULL;
254 	struct rt6_info *sprt;
255 
256 	if (oif) {
257 		for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
258 			struct net_device *dev = sprt->rt6i_dev;
259 			if (dev->ifindex == oif)
260 				return sprt;
261 			if (dev->flags & IFF_LOOPBACK) {
262 				if (sprt->rt6i_idev == NULL ||
263 				    sprt->rt6i_idev->dev->ifindex != oif) {
264 					if (strict && oif)
265 						continue;
266 					if (local && (!oif ||
267 						      local->rt6i_idev->dev->ifindex == oif))
268 						continue;
269 				}
270 				local = sprt;
271 			}
272 		}
273 
274 		if (local)
275 			return local;
276 
277 		if (strict)
278 			return &ip6_null_entry;
279 	}
280 	return rt;
281 }
282 
283 #ifdef CONFIG_IPV6_ROUTER_PREF
284 static void rt6_probe(struct rt6_info *rt)
285 {
286 	struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
287 	/*
288 	 * Okay, this does not seem to be appropriate
289 	 * for now, however, we need to check if it
290 	 * is really so; aka Router Reachability Probing.
291 	 *
292 	 * Router Reachability Probe MUST be rate-limited
293 	 * to no more than one per minute.
294 	 */
295 	if (!neigh || (neigh->nud_state & NUD_VALID))
296 		return;
297 	read_lock_bh(&neigh->lock);
298 	if (!(neigh->nud_state & NUD_VALID) &&
299 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
300 		struct in6_addr mcaddr;
301 		struct in6_addr *target;
302 
303 		neigh->updated = jiffies;
304 		read_unlock_bh(&neigh->lock);
305 
306 		target = (struct in6_addr *)&neigh->primary_key;
307 		addrconf_addr_solict_mult(target, &mcaddr);
308 		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
309 	} else
310 		read_unlock_bh(&neigh->lock);
311 }
312 #else
313 static inline void rt6_probe(struct rt6_info *rt)
314 {
315 	return;
316 }
317 #endif
318 
319 /*
320  * Default Router Selection (RFC 2461 6.3.6)
321  */
322 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
323 {
324 	struct net_device *dev = rt->rt6i_dev;
325 	if (!oif || dev->ifindex == oif)
326 		return 2;
327 	if ((dev->flags & IFF_LOOPBACK) &&
328 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
329 		return 1;
330 	return 0;
331 }
332 
333 static inline int rt6_check_neigh(struct rt6_info *rt)
334 {
335 	struct neighbour *neigh = rt->rt6i_nexthop;
336 	int m = 0;
337 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
338 	    !(rt->rt6i_flags & RTF_GATEWAY))
339 		m = 1;
340 	else if (neigh) {
341 		read_lock_bh(&neigh->lock);
342 		if (neigh->nud_state & NUD_VALID)
343 			m = 2;
344 		else if (!(neigh->nud_state & NUD_FAILED))
345 			m = 1;
346 		read_unlock_bh(&neigh->lock);
347 	}
348 	return m;
349 }
350 
351 static int rt6_score_route(struct rt6_info *rt, int oif,
352 			   int strict)
353 {
354 	int m, n;
355 
356 	m = rt6_check_dev(rt, oif);
357 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
358 		return -1;
359 #ifdef CONFIG_IPV6_ROUTER_PREF
360 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
361 #endif
362 	n = rt6_check_neigh(rt);
363 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
364 		return -1;
365 	return m;
366 }
367 
368 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
369 				   int *mpri, struct rt6_info *match)
370 {
371 	int m;
372 
373 	if (rt6_check_expired(rt))
374 		goto out;
375 
376 	m = rt6_score_route(rt, oif, strict);
377 	if (m < 0)
378 		goto out;
379 
380 	if (m > *mpri) {
381 		if (strict & RT6_LOOKUP_F_REACHABLE)
382 			rt6_probe(match);
383 		*mpri = m;
384 		match = rt;
385 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
386 		rt6_probe(rt);
387 	}
388 
389 out:
390 	return match;
391 }
392 
393 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
394 				     struct rt6_info *rr_head,
395 				     u32 metric, int oif, int strict)
396 {
397 	struct rt6_info *rt, *match;
398 	int mpri = -1;
399 
400 	match = NULL;
401 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
402 	     rt = rt->u.dst.rt6_next)
403 		match = find_match(rt, oif, strict, &mpri, match);
404 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
405 	     rt = rt->u.dst.rt6_next)
406 		match = find_match(rt, oif, strict, &mpri, match);
407 
408 	return match;
409 }
410 
411 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
412 {
413 	struct rt6_info *match, *rt0;
414 
415 	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
416 		  __FUNCTION__, fn->leaf, oif);
417 
418 	rt0 = fn->rr_ptr;
419 	if (!rt0)
420 		fn->rr_ptr = rt0 = fn->leaf;
421 
422 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
423 
424 	if (!match &&
425 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
426 		struct rt6_info *next = rt0->u.dst.rt6_next;
427 
428 		/* no entries matched; do round-robin */
429 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
430 			next = fn->leaf;
431 
432 		if (next != rt0)
433 			fn->rr_ptr = next;
434 	}
435 
436 	RT6_TRACE("%s() => %p\n",
437 		  __FUNCTION__, match);
438 
439 	return (match ? match : &ip6_null_entry);
440 }
441 
442 #ifdef CONFIG_IPV6_ROUTE_INFO
443 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
444 		  struct in6_addr *gwaddr)
445 {
446 	struct route_info *rinfo = (struct route_info *) opt;
447 	struct in6_addr prefix_buf, *prefix;
448 	unsigned int pref;
449 	u32 lifetime;
450 	struct rt6_info *rt;
451 
452 	if (len < sizeof(struct route_info)) {
453 		return -EINVAL;
454 	}
455 
456 	/* Sanity check for prefix_len and length */
457 	if (rinfo->length > 3) {
458 		return -EINVAL;
459 	} else if (rinfo->prefix_len > 128) {
460 		return -EINVAL;
461 	} else if (rinfo->prefix_len > 64) {
462 		if (rinfo->length < 2) {
463 			return -EINVAL;
464 		}
465 	} else if (rinfo->prefix_len > 0) {
466 		if (rinfo->length < 1) {
467 			return -EINVAL;
468 		}
469 	}
470 
471 	pref = rinfo->route_pref;
472 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
473 		pref = ICMPV6_ROUTER_PREF_MEDIUM;
474 
475 	lifetime = ntohl(rinfo->lifetime);
476 	if (lifetime == 0xffffffff) {
477 		/* infinity */
478 	} else if (lifetime > 0x7fffffff/HZ) {
479 		/* Avoid arithmetic overflow */
480 		lifetime = 0x7fffffff/HZ - 1;
481 	}
482 
483 	if (rinfo->length == 3)
484 		prefix = (struct in6_addr *)rinfo->prefix;
485 	else {
486 		/* this function is safe */
487 		ipv6_addr_prefix(&prefix_buf,
488 				 (struct in6_addr *)rinfo->prefix,
489 				 rinfo->prefix_len);
490 		prefix = &prefix_buf;
491 	}
492 
493 	rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
494 
495 	if (rt && !lifetime) {
496 		ip6_del_rt(rt);
497 		rt = NULL;
498 	}
499 
500 	if (!rt && lifetime)
501 		rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
502 					pref);
503 	else if (rt)
504 		rt->rt6i_flags = RTF_ROUTEINFO |
505 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
506 
507 	if (rt) {
508 		if (lifetime == 0xffffffff) {
509 			rt->rt6i_flags &= ~RTF_EXPIRES;
510 		} else {
511 			rt->rt6i_expires = jiffies + HZ * lifetime;
512 			rt->rt6i_flags |= RTF_EXPIRES;
513 		}
514 		dst_release(&rt->u.dst);
515 	}
516 	return 0;
517 }
518 #endif
519 
520 #define BACKTRACK(saddr) \
521 do { \
522 	if (rt == &ip6_null_entry) { \
523 		struct fib6_node *pn; \
524 		while (1) { \
525 			if (fn->fn_flags & RTN_TL_ROOT) \
526 				goto out; \
527 			pn = fn->parent; \
528 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
529 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
530 			else \
531 				fn = pn; \
532 			if (fn->fn_flags & RTN_RTINFO) \
533 				goto restart; \
534 		} \
535 	} \
536 } while(0)
537 
538 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
539 					     struct flowi *fl, int flags)
540 {
541 	struct fib6_node *fn;
542 	struct rt6_info *rt;
543 
544 	read_lock_bh(&table->tb6_lock);
545 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
546 restart:
547 	rt = fn->leaf;
548 	rt = rt6_device_match(rt, fl->oif, flags);
549 	BACKTRACK(&fl->fl6_src);
550 out:
551 	dst_hold(&rt->u.dst);
552 	read_unlock_bh(&table->tb6_lock);
553 
554 	rt->u.dst.lastuse = jiffies;
555 	rt->u.dst.__use++;
556 
557 	return rt;
558 
559 }
560 
561 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
562 			    int oif, int strict)
563 {
564 	struct flowi fl = {
565 		.oif = oif,
566 		.nl_u = {
567 			.ip6_u = {
568 				.daddr = *daddr,
569 			},
570 		},
571 	};
572 	struct dst_entry *dst;
573 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
574 
575 	if (saddr) {
576 		memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
577 		flags |= RT6_LOOKUP_F_HAS_SADDR;
578 	}
579 
580 	dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
581 	if (dst->error == 0)
582 		return (struct rt6_info *) dst;
583 
584 	dst_release(dst);
585 
586 	return NULL;
587 }
588 
589 EXPORT_SYMBOL(rt6_lookup);
590 
591 /* ip6_ins_rt is called with FREE table->tb6_lock.
592    It takes new route entry, the addition fails by any reason the
593    route is freed. In any case, if caller does not hold it, it may
594    be destroyed.
595  */
596 
597 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
598 {
599 	int err;
600 	struct fib6_table *table;
601 
602 	table = rt->rt6i_table;
603 	write_lock_bh(&table->tb6_lock);
604 	err = fib6_add(&table->tb6_root, rt, info);
605 	write_unlock_bh(&table->tb6_lock);
606 
607 	return err;
608 }
609 
610 int ip6_ins_rt(struct rt6_info *rt)
611 {
612 	return __ip6_ins_rt(rt, NULL);
613 }
614 
615 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
616 				      struct in6_addr *saddr)
617 {
618 	struct rt6_info *rt;
619 
620 	/*
621 	 *	Clone the route.
622 	 */
623 
624 	rt = ip6_rt_copy(ort);
625 
626 	if (rt) {
627 		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
628 			if (rt->rt6i_dst.plen != 128 &&
629 			    ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
630 				rt->rt6i_flags |= RTF_ANYCAST;
631 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
632 		}
633 
634 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
635 		rt->rt6i_dst.plen = 128;
636 		rt->rt6i_flags |= RTF_CACHE;
637 		rt->u.dst.flags |= DST_HOST;
638 
639 #ifdef CONFIG_IPV6_SUBTREES
640 		if (rt->rt6i_src.plen && saddr) {
641 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
642 			rt->rt6i_src.plen = 128;
643 		}
644 #endif
645 
646 		rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
647 
648 	}
649 
650 	return rt;
651 }
652 
653 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
654 {
655 	struct rt6_info *rt = ip6_rt_copy(ort);
656 	if (rt) {
657 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
658 		rt->rt6i_dst.plen = 128;
659 		rt->rt6i_flags |= RTF_CACHE;
660 		rt->u.dst.flags |= DST_HOST;
661 		rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
662 	}
663 	return rt;
664 }
665 
666 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
667 					    struct flowi *fl, int flags)
668 {
669 	struct fib6_node *fn;
670 	struct rt6_info *rt, *nrt;
671 	int strict = 0;
672 	int attempts = 3;
673 	int err;
674 	int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
675 
676 	strict |= flags & RT6_LOOKUP_F_IFACE;
677 
678 relookup:
679 	read_lock_bh(&table->tb6_lock);
680 
681 restart_2:
682 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
683 
684 restart:
685 	rt = rt6_select(fn, fl->iif, strict | reachable);
686 	BACKTRACK(&fl->fl6_src);
687 	if (rt == &ip6_null_entry ||
688 	    rt->rt6i_flags & RTF_CACHE)
689 		goto out;
690 
691 	dst_hold(&rt->u.dst);
692 	read_unlock_bh(&table->tb6_lock);
693 
694 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
695 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
696 	else {
697 #if CLONE_OFFLINK_ROUTE
698 		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
699 #else
700 		goto out2;
701 #endif
702 	}
703 
704 	dst_release(&rt->u.dst);
705 	rt = nrt ? : &ip6_null_entry;
706 
707 	dst_hold(&rt->u.dst);
708 	if (nrt) {
709 		err = ip6_ins_rt(nrt);
710 		if (!err)
711 			goto out2;
712 	}
713 
714 	if (--attempts <= 0)
715 		goto out2;
716 
717 	/*
718 	 * Race condition! In the gap, when table->tb6_lock was
719 	 * released someone could insert this route.  Relookup.
720 	 */
721 	dst_release(&rt->u.dst);
722 	goto relookup;
723 
724 out:
725 	if (reachable) {
726 		reachable = 0;
727 		goto restart_2;
728 	}
729 	dst_hold(&rt->u.dst);
730 	read_unlock_bh(&table->tb6_lock);
731 out2:
732 	rt->u.dst.lastuse = jiffies;
733 	rt->u.dst.__use++;
734 
735 	return rt;
736 }
737 
738 void ip6_route_input(struct sk_buff *skb)
739 {
740 	struct ipv6hdr *iph = ipv6_hdr(skb);
741 	int flags = RT6_LOOKUP_F_HAS_SADDR;
742 	struct flowi fl = {
743 		.iif = skb->dev->ifindex,
744 		.nl_u = {
745 			.ip6_u = {
746 				.daddr = iph->daddr,
747 				.saddr = iph->saddr,
748 				.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
749 			},
750 		},
751 		.mark = skb->mark,
752 		.proto = iph->nexthdr,
753 	};
754 
755 	if (rt6_need_strict(&iph->daddr))
756 		flags |= RT6_LOOKUP_F_IFACE;
757 
758 	skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
759 }
760 
761 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
762 					     struct flowi *fl, int flags)
763 {
764 	struct fib6_node *fn;
765 	struct rt6_info *rt, *nrt;
766 	int strict = 0;
767 	int attempts = 3;
768 	int err;
769 	int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
770 
771 	strict |= flags & RT6_LOOKUP_F_IFACE;
772 
773 relookup:
774 	read_lock_bh(&table->tb6_lock);
775 
776 restart_2:
777 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
778 
779 restart:
780 	rt = rt6_select(fn, fl->oif, strict | reachable);
781 	BACKTRACK(&fl->fl6_src);
782 	if (rt == &ip6_null_entry ||
783 	    rt->rt6i_flags & RTF_CACHE)
784 		goto out;
785 
786 	dst_hold(&rt->u.dst);
787 	read_unlock_bh(&table->tb6_lock);
788 
789 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
790 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
791 	else {
792 #if CLONE_OFFLINK_ROUTE
793 		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
794 #else
795 		goto out2;
796 #endif
797 	}
798 
799 	dst_release(&rt->u.dst);
800 	rt = nrt ? : &ip6_null_entry;
801 
802 	dst_hold(&rt->u.dst);
803 	if (nrt) {
804 		err = ip6_ins_rt(nrt);
805 		if (!err)
806 			goto out2;
807 	}
808 
809 	if (--attempts <= 0)
810 		goto out2;
811 
812 	/*
813 	 * Race condition! In the gap, when table->tb6_lock was
814 	 * released someone could insert this route.  Relookup.
815 	 */
816 	dst_release(&rt->u.dst);
817 	goto relookup;
818 
819 out:
820 	if (reachable) {
821 		reachable = 0;
822 		goto restart_2;
823 	}
824 	dst_hold(&rt->u.dst);
825 	read_unlock_bh(&table->tb6_lock);
826 out2:
827 	rt->u.dst.lastuse = jiffies;
828 	rt->u.dst.__use++;
829 	return rt;
830 }
831 
832 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
833 {
834 	int flags = 0;
835 
836 	if (rt6_need_strict(&fl->fl6_dst))
837 		flags |= RT6_LOOKUP_F_IFACE;
838 
839 	if (!ipv6_addr_any(&fl->fl6_src))
840 		flags |= RT6_LOOKUP_F_HAS_SADDR;
841 
842 	return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
843 }
844 
845 EXPORT_SYMBOL(ip6_route_output);
846 
847 static int ip6_blackhole_output(struct sk_buff *skb)
848 {
849 	kfree_skb(skb);
850 	return 0;
851 }
852 
853 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
854 {
855 	struct rt6_info *ort = (struct rt6_info *) *dstp;
856 	struct rt6_info *rt = (struct rt6_info *)
857 		dst_alloc(&ip6_dst_blackhole_ops);
858 	struct dst_entry *new = NULL;
859 
860 	if (rt) {
861 		new = &rt->u.dst;
862 
863 		atomic_set(&new->__refcnt, 1);
864 		new->__use = 1;
865 		new->input = ip6_blackhole_output;
866 		new->output = ip6_blackhole_output;
867 
868 		memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
869 		new->dev = ort->u.dst.dev;
870 		if (new->dev)
871 			dev_hold(new->dev);
872 		rt->rt6i_idev = ort->rt6i_idev;
873 		if (rt->rt6i_idev)
874 			in6_dev_hold(rt->rt6i_idev);
875 		rt->rt6i_expires = 0;
876 
877 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
878 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
879 		rt->rt6i_metric = 0;
880 
881 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
882 #ifdef CONFIG_IPV6_SUBTREES
883 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
884 #endif
885 
886 		dst_free(new);
887 	}
888 
889 	dst_release(*dstp);
890 	*dstp = new;
891 	return (new ? 0 : -ENOMEM);
892 }
893 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
894 
895 /*
896  *	Destination cache support functions
897  */
898 
899 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
900 {
901 	struct rt6_info *rt;
902 
903 	rt = (struct rt6_info *) dst;
904 
905 	if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
906 		return dst;
907 
908 	return NULL;
909 }
910 
911 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
912 {
913 	struct rt6_info *rt = (struct rt6_info *) dst;
914 
915 	if (rt) {
916 		if (rt->rt6i_flags & RTF_CACHE)
917 			ip6_del_rt(rt);
918 		else
919 			dst_release(dst);
920 	}
921 	return NULL;
922 }
923 
924 static void ip6_link_failure(struct sk_buff *skb)
925 {
926 	struct rt6_info *rt;
927 
928 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
929 
930 	rt = (struct rt6_info *) skb->dst;
931 	if (rt) {
932 		if (rt->rt6i_flags&RTF_CACHE) {
933 			dst_set_expires(&rt->u.dst, 0);
934 			rt->rt6i_flags |= RTF_EXPIRES;
935 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
936 			rt->rt6i_node->fn_sernum = -1;
937 	}
938 }
939 
940 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
941 {
942 	struct rt6_info *rt6 = (struct rt6_info*)dst;
943 
944 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
945 		rt6->rt6i_flags |= RTF_MODIFIED;
946 		if (mtu < IPV6_MIN_MTU) {
947 			mtu = IPV6_MIN_MTU;
948 			dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
949 		}
950 		dst->metrics[RTAX_MTU-1] = mtu;
951 		call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
952 	}
953 }
954 
955 static int ipv6_get_mtu(struct net_device *dev);
956 
957 static inline unsigned int ipv6_advmss(unsigned int mtu)
958 {
959 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
960 
961 	if (mtu < ip6_rt_min_advmss)
962 		mtu = ip6_rt_min_advmss;
963 
964 	/*
965 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
966 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
967 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
968 	 * rely only on pmtu discovery"
969 	 */
970 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
971 		mtu = IPV6_MAXPLEN;
972 	return mtu;
973 }
974 
975 static struct dst_entry *ndisc_dst_gc_list;
976 static DEFINE_SPINLOCK(ndisc_lock);
977 
978 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
979 				  struct neighbour *neigh,
980 				  struct in6_addr *addr,
981 				  int (*output)(struct sk_buff *))
982 {
983 	struct rt6_info *rt;
984 	struct inet6_dev *idev = in6_dev_get(dev);
985 
986 	if (unlikely(idev == NULL))
987 		return NULL;
988 
989 	rt = ip6_dst_alloc();
990 	if (unlikely(rt == NULL)) {
991 		in6_dev_put(idev);
992 		goto out;
993 	}
994 
995 	dev_hold(dev);
996 	if (neigh)
997 		neigh_hold(neigh);
998 	else
999 		neigh = ndisc_get_neigh(dev, addr);
1000 
1001 	rt->rt6i_dev	  = dev;
1002 	rt->rt6i_idev     = idev;
1003 	rt->rt6i_nexthop  = neigh;
1004 	atomic_set(&rt->u.dst.__refcnt, 1);
1005 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
1006 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1007 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1008 	rt->u.dst.output  = output;
1009 
1010 #if 0	/* there's no chance to use these for ndisc */
1011 	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1012 				? DST_HOST
1013 				: 0;
1014 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1015 	rt->rt6i_dst.plen = 128;
1016 #endif
1017 
1018 	spin_lock_bh(&ndisc_lock);
1019 	rt->u.dst.next = ndisc_dst_gc_list;
1020 	ndisc_dst_gc_list = &rt->u.dst;
1021 	spin_unlock_bh(&ndisc_lock);
1022 
1023 	fib6_force_start_gc();
1024 
1025 out:
1026 	return &rt->u.dst;
1027 }
1028 
1029 int ndisc_dst_gc(int *more)
1030 {
1031 	struct dst_entry *dst, *next, **pprev;
1032 	int freed;
1033 
1034 	next = NULL;
1035 	freed = 0;
1036 
1037 	spin_lock_bh(&ndisc_lock);
1038 	pprev = &ndisc_dst_gc_list;
1039 
1040 	while ((dst = *pprev) != NULL) {
1041 		if (!atomic_read(&dst->__refcnt)) {
1042 			*pprev = dst->next;
1043 			dst_free(dst);
1044 			freed++;
1045 		} else {
1046 			pprev = &dst->next;
1047 			(*more)++;
1048 		}
1049 	}
1050 
1051 	spin_unlock_bh(&ndisc_lock);
1052 
1053 	return freed;
1054 }
1055 
1056 static int ip6_dst_gc(void)
1057 {
1058 	static unsigned expire = 30*HZ;
1059 	static unsigned long last_gc;
1060 	unsigned long now = jiffies;
1061 
1062 	if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
1063 	    atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
1064 		goto out;
1065 
1066 	expire++;
1067 	fib6_run_gc(expire);
1068 	last_gc = now;
1069 	if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1070 		expire = ip6_rt_gc_timeout>>1;
1071 
1072 out:
1073 	expire -= expire>>ip6_rt_gc_elasticity;
1074 	return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
1075 }
1076 
1077 /* Clean host part of a prefix. Not necessary in radix tree,
1078    but results in cleaner routing tables.
1079 
1080    Remove it only when all the things will work!
1081  */
1082 
1083 static int ipv6_get_mtu(struct net_device *dev)
1084 {
1085 	int mtu = IPV6_MIN_MTU;
1086 	struct inet6_dev *idev;
1087 
1088 	idev = in6_dev_get(dev);
1089 	if (idev) {
1090 		mtu = idev->cnf.mtu6;
1091 		in6_dev_put(idev);
1092 	}
1093 	return mtu;
1094 }
1095 
1096 int ipv6_get_hoplimit(struct net_device *dev)
1097 {
1098 	int hoplimit = ipv6_devconf.hop_limit;
1099 	struct inet6_dev *idev;
1100 
1101 	idev = in6_dev_get(dev);
1102 	if (idev) {
1103 		hoplimit = idev->cnf.hop_limit;
1104 		in6_dev_put(idev);
1105 	}
1106 	return hoplimit;
1107 }
1108 
1109 /*
1110  *
1111  */
1112 
1113 int ip6_route_add(struct fib6_config *cfg)
1114 {
1115 	int err;
1116 	struct rt6_info *rt = NULL;
1117 	struct net_device *dev = NULL;
1118 	struct inet6_dev *idev = NULL;
1119 	struct fib6_table *table;
1120 	int addr_type;
1121 
1122 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1123 		return -EINVAL;
1124 #ifndef CONFIG_IPV6_SUBTREES
1125 	if (cfg->fc_src_len)
1126 		return -EINVAL;
1127 #endif
1128 	if (cfg->fc_ifindex) {
1129 		err = -ENODEV;
1130 		dev = dev_get_by_index(&init_net, cfg->fc_ifindex);
1131 		if (!dev)
1132 			goto out;
1133 		idev = in6_dev_get(dev);
1134 		if (!idev)
1135 			goto out;
1136 	}
1137 
1138 	if (cfg->fc_metric == 0)
1139 		cfg->fc_metric = IP6_RT_PRIO_USER;
1140 
1141 	table = fib6_new_table(cfg->fc_table);
1142 	if (table == NULL) {
1143 		err = -ENOBUFS;
1144 		goto out;
1145 	}
1146 
1147 	rt = ip6_dst_alloc();
1148 
1149 	if (rt == NULL) {
1150 		err = -ENOMEM;
1151 		goto out;
1152 	}
1153 
1154 	rt->u.dst.obsolete = -1;
1155 	rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1156 
1157 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1158 		cfg->fc_protocol = RTPROT_BOOT;
1159 	rt->rt6i_protocol = cfg->fc_protocol;
1160 
1161 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1162 
1163 	if (addr_type & IPV6_ADDR_MULTICAST)
1164 		rt->u.dst.input = ip6_mc_input;
1165 	else
1166 		rt->u.dst.input = ip6_forward;
1167 
1168 	rt->u.dst.output = ip6_output;
1169 
1170 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1171 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1172 	if (rt->rt6i_dst.plen == 128)
1173 	       rt->u.dst.flags = DST_HOST;
1174 
1175 #ifdef CONFIG_IPV6_SUBTREES
1176 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1177 	rt->rt6i_src.plen = cfg->fc_src_len;
1178 #endif
1179 
1180 	rt->rt6i_metric = cfg->fc_metric;
1181 
1182 	/* We cannot add true routes via loopback here,
1183 	   they would result in kernel looping; promote them to reject routes
1184 	 */
1185 	if ((cfg->fc_flags & RTF_REJECT) ||
1186 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1187 		/* hold loopback dev/idev if we haven't done so. */
1188 		if (dev != init_net.loopback_dev) {
1189 			if (dev) {
1190 				dev_put(dev);
1191 				in6_dev_put(idev);
1192 			}
1193 			dev = init_net.loopback_dev;
1194 			dev_hold(dev);
1195 			idev = in6_dev_get(dev);
1196 			if (!idev) {
1197 				err = -ENODEV;
1198 				goto out;
1199 			}
1200 		}
1201 		rt->u.dst.output = ip6_pkt_discard_out;
1202 		rt->u.dst.input = ip6_pkt_discard;
1203 		rt->u.dst.error = -ENETUNREACH;
1204 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1205 		goto install_route;
1206 	}
1207 
1208 	if (cfg->fc_flags & RTF_GATEWAY) {
1209 		struct in6_addr *gw_addr;
1210 		int gwa_type;
1211 
1212 		gw_addr = &cfg->fc_gateway;
1213 		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1214 		gwa_type = ipv6_addr_type(gw_addr);
1215 
1216 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1217 			struct rt6_info *grt;
1218 
1219 			/* IPv6 strictly inhibits using not link-local
1220 			   addresses as nexthop address.
1221 			   Otherwise, router will not able to send redirects.
1222 			   It is very good, but in some (rare!) circumstances
1223 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1224 			   some exceptions. --ANK
1225 			 */
1226 			err = -EINVAL;
1227 			if (!(gwa_type&IPV6_ADDR_UNICAST))
1228 				goto out;
1229 
1230 			grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1231 
1232 			err = -EHOSTUNREACH;
1233 			if (grt == NULL)
1234 				goto out;
1235 			if (dev) {
1236 				if (dev != grt->rt6i_dev) {
1237 					dst_release(&grt->u.dst);
1238 					goto out;
1239 				}
1240 			} else {
1241 				dev = grt->rt6i_dev;
1242 				idev = grt->rt6i_idev;
1243 				dev_hold(dev);
1244 				in6_dev_hold(grt->rt6i_idev);
1245 			}
1246 			if (!(grt->rt6i_flags&RTF_GATEWAY))
1247 				err = 0;
1248 			dst_release(&grt->u.dst);
1249 
1250 			if (err)
1251 				goto out;
1252 		}
1253 		err = -EINVAL;
1254 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1255 			goto out;
1256 	}
1257 
1258 	err = -ENODEV;
1259 	if (dev == NULL)
1260 		goto out;
1261 
1262 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1263 		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1264 		if (IS_ERR(rt->rt6i_nexthop)) {
1265 			err = PTR_ERR(rt->rt6i_nexthop);
1266 			rt->rt6i_nexthop = NULL;
1267 			goto out;
1268 		}
1269 	}
1270 
1271 	rt->rt6i_flags = cfg->fc_flags;
1272 
1273 install_route:
1274 	if (cfg->fc_mx) {
1275 		struct nlattr *nla;
1276 		int remaining;
1277 
1278 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1279 			int type = nla_type(nla);
1280 
1281 			if (type) {
1282 				if (type > RTAX_MAX) {
1283 					err = -EINVAL;
1284 					goto out;
1285 				}
1286 
1287 				rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1288 			}
1289 		}
1290 	}
1291 
1292 	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1293 		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1294 	if (!rt->u.dst.metrics[RTAX_MTU-1])
1295 		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1296 	if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1297 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1298 	rt->u.dst.dev = dev;
1299 	rt->rt6i_idev = idev;
1300 	rt->rt6i_table = table;
1301 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1302 
1303 out:
1304 	if (dev)
1305 		dev_put(dev);
1306 	if (idev)
1307 		in6_dev_put(idev);
1308 	if (rt)
1309 		dst_free(&rt->u.dst);
1310 	return err;
1311 }
1312 
1313 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1314 {
1315 	int err;
1316 	struct fib6_table *table;
1317 
1318 	if (rt == &ip6_null_entry)
1319 		return -ENOENT;
1320 
1321 	table = rt->rt6i_table;
1322 	write_lock_bh(&table->tb6_lock);
1323 
1324 	err = fib6_del(rt, info);
1325 	dst_release(&rt->u.dst);
1326 
1327 	write_unlock_bh(&table->tb6_lock);
1328 
1329 	return err;
1330 }
1331 
1332 int ip6_del_rt(struct rt6_info *rt)
1333 {
1334 	return __ip6_del_rt(rt, NULL);
1335 }
1336 
1337 static int ip6_route_del(struct fib6_config *cfg)
1338 {
1339 	struct fib6_table *table;
1340 	struct fib6_node *fn;
1341 	struct rt6_info *rt;
1342 	int err = -ESRCH;
1343 
1344 	table = fib6_get_table(cfg->fc_table);
1345 	if (table == NULL)
1346 		return err;
1347 
1348 	read_lock_bh(&table->tb6_lock);
1349 
1350 	fn = fib6_locate(&table->tb6_root,
1351 			 &cfg->fc_dst, cfg->fc_dst_len,
1352 			 &cfg->fc_src, cfg->fc_src_len);
1353 
1354 	if (fn) {
1355 		for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1356 			if (cfg->fc_ifindex &&
1357 			    (rt->rt6i_dev == NULL ||
1358 			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1359 				continue;
1360 			if (cfg->fc_flags & RTF_GATEWAY &&
1361 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1362 				continue;
1363 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1364 				continue;
1365 			dst_hold(&rt->u.dst);
1366 			read_unlock_bh(&table->tb6_lock);
1367 
1368 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1369 		}
1370 	}
1371 	read_unlock_bh(&table->tb6_lock);
1372 
1373 	return err;
1374 }
1375 
1376 /*
1377  *	Handle redirects
1378  */
1379 struct ip6rd_flowi {
1380 	struct flowi fl;
1381 	struct in6_addr gateway;
1382 };
1383 
1384 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1385 					     struct flowi *fl,
1386 					     int flags)
1387 {
1388 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1389 	struct rt6_info *rt;
1390 	struct fib6_node *fn;
1391 
1392 	/*
1393 	 * Get the "current" route for this destination and
1394 	 * check if the redirect has come from approriate router.
1395 	 *
1396 	 * RFC 2461 specifies that redirects should only be
1397 	 * accepted if they come from the nexthop to the target.
1398 	 * Due to the way the routes are chosen, this notion
1399 	 * is a bit fuzzy and one might need to check all possible
1400 	 * routes.
1401 	 */
1402 
1403 	read_lock_bh(&table->tb6_lock);
1404 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1405 restart:
1406 	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1407 		/*
1408 		 * Current route is on-link; redirect is always invalid.
1409 		 *
1410 		 * Seems, previous statement is not true. It could
1411 		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1412 		 * But then router serving it might decide, that we should
1413 		 * know truth 8)8) --ANK (980726).
1414 		 */
1415 		if (rt6_check_expired(rt))
1416 			continue;
1417 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1418 			continue;
1419 		if (fl->oif != rt->rt6i_dev->ifindex)
1420 			continue;
1421 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1422 			continue;
1423 		break;
1424 	}
1425 
1426 	if (!rt)
1427 		rt = &ip6_null_entry;
1428 	BACKTRACK(&fl->fl6_src);
1429 out:
1430 	dst_hold(&rt->u.dst);
1431 
1432 	read_unlock_bh(&table->tb6_lock);
1433 
1434 	return rt;
1435 };
1436 
1437 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1438 					   struct in6_addr *src,
1439 					   struct in6_addr *gateway,
1440 					   struct net_device *dev)
1441 {
1442 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1443 	struct ip6rd_flowi rdfl = {
1444 		.fl = {
1445 			.oif = dev->ifindex,
1446 			.nl_u = {
1447 				.ip6_u = {
1448 					.daddr = *dest,
1449 					.saddr = *src,
1450 				},
1451 			},
1452 		},
1453 		.gateway = *gateway,
1454 	};
1455 
1456 	if (rt6_need_strict(dest))
1457 		flags |= RT6_LOOKUP_F_IFACE;
1458 
1459 	return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1460 }
1461 
1462 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1463 		  struct in6_addr *saddr,
1464 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1465 {
1466 	struct rt6_info *rt, *nrt = NULL;
1467 	struct netevent_redirect netevent;
1468 
1469 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1470 
1471 	if (rt == &ip6_null_entry) {
1472 		if (net_ratelimit())
1473 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1474 			       "for redirect target\n");
1475 		goto out;
1476 	}
1477 
1478 	/*
1479 	 *	We have finally decided to accept it.
1480 	 */
1481 
1482 	neigh_update(neigh, lladdr, NUD_STALE,
1483 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1484 		     NEIGH_UPDATE_F_OVERRIDE|
1485 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1486 				     NEIGH_UPDATE_F_ISROUTER))
1487 		     );
1488 
1489 	/*
1490 	 * Redirect received -> path was valid.
1491 	 * Look, redirects are sent only in response to data packets,
1492 	 * so that this nexthop apparently is reachable. --ANK
1493 	 */
1494 	dst_confirm(&rt->u.dst);
1495 
1496 	/* Duplicate redirect: silently ignore. */
1497 	if (neigh == rt->u.dst.neighbour)
1498 		goto out;
1499 
1500 	nrt = ip6_rt_copy(rt);
1501 	if (nrt == NULL)
1502 		goto out;
1503 
1504 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1505 	if (on_link)
1506 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1507 
1508 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1509 	nrt->rt6i_dst.plen = 128;
1510 	nrt->u.dst.flags |= DST_HOST;
1511 
1512 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1513 	nrt->rt6i_nexthop = neigh_clone(neigh);
1514 	/* Reset pmtu, it may be better */
1515 	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1516 	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1517 
1518 	if (ip6_ins_rt(nrt))
1519 		goto out;
1520 
1521 	netevent.old = &rt->u.dst;
1522 	netevent.new = &nrt->u.dst;
1523 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1524 
1525 	if (rt->rt6i_flags&RTF_CACHE) {
1526 		ip6_del_rt(rt);
1527 		return;
1528 	}
1529 
1530 out:
1531 	dst_release(&rt->u.dst);
1532 	return;
1533 }
1534 
1535 /*
1536  *	Handle ICMP "packet too big" messages
1537  *	i.e. Path MTU discovery
1538  */
1539 
1540 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1541 			struct net_device *dev, u32 pmtu)
1542 {
1543 	struct rt6_info *rt, *nrt;
1544 	int allfrag = 0;
1545 
1546 	rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1547 	if (rt == NULL)
1548 		return;
1549 
1550 	if (pmtu >= dst_mtu(&rt->u.dst))
1551 		goto out;
1552 
1553 	if (pmtu < IPV6_MIN_MTU) {
1554 		/*
1555 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1556 		 * MTU (1280) and a fragment header should always be included
1557 		 * after a node receiving Too Big message reporting PMTU is
1558 		 * less than the IPv6 Minimum Link MTU.
1559 		 */
1560 		pmtu = IPV6_MIN_MTU;
1561 		allfrag = 1;
1562 	}
1563 
1564 	/* New mtu received -> path was valid.
1565 	   They are sent only in response to data packets,
1566 	   so that this nexthop apparently is reachable. --ANK
1567 	 */
1568 	dst_confirm(&rt->u.dst);
1569 
1570 	/* Host route. If it is static, it would be better
1571 	   not to override it, but add new one, so that
1572 	   when cache entry will expire old pmtu
1573 	   would return automatically.
1574 	 */
1575 	if (rt->rt6i_flags & RTF_CACHE) {
1576 		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1577 		if (allfrag)
1578 			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1579 		dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1580 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1581 		goto out;
1582 	}
1583 
1584 	/* Network route.
1585 	   Two cases are possible:
1586 	   1. It is connected route. Action: COW
1587 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1588 	 */
1589 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1590 		nrt = rt6_alloc_cow(rt, daddr, saddr);
1591 	else
1592 		nrt = rt6_alloc_clone(rt, daddr);
1593 
1594 	if (nrt) {
1595 		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1596 		if (allfrag)
1597 			nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1598 
1599 		/* According to RFC 1981, detecting PMTU increase shouldn't be
1600 		 * happened within 5 mins, the recommended timer is 10 mins.
1601 		 * Here this route expiration time is set to ip6_rt_mtu_expires
1602 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1603 		 * and detecting PMTU increase will be automatically happened.
1604 		 */
1605 		dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1606 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1607 
1608 		ip6_ins_rt(nrt);
1609 	}
1610 out:
1611 	dst_release(&rt->u.dst);
1612 }
1613 
1614 /*
1615  *	Misc support functions
1616  */
1617 
1618 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1619 {
1620 	struct rt6_info *rt = ip6_dst_alloc();
1621 
1622 	if (rt) {
1623 		rt->u.dst.input = ort->u.dst.input;
1624 		rt->u.dst.output = ort->u.dst.output;
1625 
1626 		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1627 		rt->u.dst.error = ort->u.dst.error;
1628 		rt->u.dst.dev = ort->u.dst.dev;
1629 		if (rt->u.dst.dev)
1630 			dev_hold(rt->u.dst.dev);
1631 		rt->rt6i_idev = ort->rt6i_idev;
1632 		if (rt->rt6i_idev)
1633 			in6_dev_hold(rt->rt6i_idev);
1634 		rt->u.dst.lastuse = jiffies;
1635 		rt->rt6i_expires = 0;
1636 
1637 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1638 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1639 		rt->rt6i_metric = 0;
1640 
1641 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1642 #ifdef CONFIG_IPV6_SUBTREES
1643 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1644 #endif
1645 		rt->rt6i_table = ort->rt6i_table;
1646 	}
1647 	return rt;
1648 }
1649 
1650 #ifdef CONFIG_IPV6_ROUTE_INFO
1651 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1652 					   struct in6_addr *gwaddr, int ifindex)
1653 {
1654 	struct fib6_node *fn;
1655 	struct rt6_info *rt = NULL;
1656 	struct fib6_table *table;
1657 
1658 	table = fib6_get_table(RT6_TABLE_INFO);
1659 	if (table == NULL)
1660 		return NULL;
1661 
1662 	write_lock_bh(&table->tb6_lock);
1663 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1664 	if (!fn)
1665 		goto out;
1666 
1667 	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1668 		if (rt->rt6i_dev->ifindex != ifindex)
1669 			continue;
1670 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1671 			continue;
1672 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1673 			continue;
1674 		dst_hold(&rt->u.dst);
1675 		break;
1676 	}
1677 out:
1678 	write_unlock_bh(&table->tb6_lock);
1679 	return rt;
1680 }
1681 
1682 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1683 					   struct in6_addr *gwaddr, int ifindex,
1684 					   unsigned pref)
1685 {
1686 	struct fib6_config cfg = {
1687 		.fc_table	= RT6_TABLE_INFO,
1688 		.fc_metric	= 1024,
1689 		.fc_ifindex	= ifindex,
1690 		.fc_dst_len	= prefixlen,
1691 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1692 				  RTF_UP | RTF_PREF(pref),
1693 	};
1694 
1695 	ipv6_addr_copy(&cfg.fc_dst, prefix);
1696 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1697 
1698 	/* We should treat it as a default route if prefix length is 0. */
1699 	if (!prefixlen)
1700 		cfg.fc_flags |= RTF_DEFAULT;
1701 
1702 	ip6_route_add(&cfg);
1703 
1704 	return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1705 }
1706 #endif
1707 
1708 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1709 {
1710 	struct rt6_info *rt;
1711 	struct fib6_table *table;
1712 
1713 	table = fib6_get_table(RT6_TABLE_DFLT);
1714 	if (table == NULL)
1715 		return NULL;
1716 
1717 	write_lock_bh(&table->tb6_lock);
1718 	for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1719 		if (dev == rt->rt6i_dev &&
1720 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1721 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1722 			break;
1723 	}
1724 	if (rt)
1725 		dst_hold(&rt->u.dst);
1726 	write_unlock_bh(&table->tb6_lock);
1727 	return rt;
1728 }
1729 
1730 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1731 				     struct net_device *dev,
1732 				     unsigned int pref)
1733 {
1734 	struct fib6_config cfg = {
1735 		.fc_table	= RT6_TABLE_DFLT,
1736 		.fc_metric	= 1024,
1737 		.fc_ifindex	= dev->ifindex,
1738 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1739 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1740 	};
1741 
1742 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1743 
1744 	ip6_route_add(&cfg);
1745 
1746 	return rt6_get_dflt_router(gwaddr, dev);
1747 }
1748 
1749 void rt6_purge_dflt_routers(void)
1750 {
1751 	struct rt6_info *rt;
1752 	struct fib6_table *table;
1753 
1754 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1755 	table = fib6_get_table(RT6_TABLE_DFLT);
1756 	if (table == NULL)
1757 		return;
1758 
1759 restart:
1760 	read_lock_bh(&table->tb6_lock);
1761 	for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1762 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1763 			dst_hold(&rt->u.dst);
1764 			read_unlock_bh(&table->tb6_lock);
1765 			ip6_del_rt(rt);
1766 			goto restart;
1767 		}
1768 	}
1769 	read_unlock_bh(&table->tb6_lock);
1770 }
1771 
1772 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1773 				 struct fib6_config *cfg)
1774 {
1775 	memset(cfg, 0, sizeof(*cfg));
1776 
1777 	cfg->fc_table = RT6_TABLE_MAIN;
1778 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1779 	cfg->fc_metric = rtmsg->rtmsg_metric;
1780 	cfg->fc_expires = rtmsg->rtmsg_info;
1781 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1782 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1783 	cfg->fc_flags = rtmsg->rtmsg_flags;
1784 
1785 	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1786 	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1787 	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1788 }
1789 
1790 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1791 {
1792 	struct fib6_config cfg;
1793 	struct in6_rtmsg rtmsg;
1794 	int err;
1795 
1796 	switch(cmd) {
1797 	case SIOCADDRT:		/* Add a route */
1798 	case SIOCDELRT:		/* Delete a route */
1799 		if (!capable(CAP_NET_ADMIN))
1800 			return -EPERM;
1801 		err = copy_from_user(&rtmsg, arg,
1802 				     sizeof(struct in6_rtmsg));
1803 		if (err)
1804 			return -EFAULT;
1805 
1806 		rtmsg_to_fib6_config(&rtmsg, &cfg);
1807 
1808 		rtnl_lock();
1809 		switch (cmd) {
1810 		case SIOCADDRT:
1811 			err = ip6_route_add(&cfg);
1812 			break;
1813 		case SIOCDELRT:
1814 			err = ip6_route_del(&cfg);
1815 			break;
1816 		default:
1817 			err = -EINVAL;
1818 		}
1819 		rtnl_unlock();
1820 
1821 		return err;
1822 	}
1823 
1824 	return -EINVAL;
1825 }
1826 
1827 /*
1828  *	Drop the packet on the floor
1829  */
1830 
1831 static inline int ip6_pkt_drop(struct sk_buff *skb, int code,
1832 			       int ipstats_mib_noroutes)
1833 {
1834 	int type;
1835 	switch (ipstats_mib_noroutes) {
1836 	case IPSTATS_MIB_INNOROUTES:
1837 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1838 		if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1839 			IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1840 			break;
1841 		}
1842 		/* FALLTHROUGH */
1843 	case IPSTATS_MIB_OUTNOROUTES:
1844 		IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1845 		break;
1846 	}
1847 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1848 	kfree_skb(skb);
1849 	return 0;
1850 }
1851 
1852 static int ip6_pkt_discard(struct sk_buff *skb)
1853 {
1854 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1855 }
1856 
1857 static int ip6_pkt_discard_out(struct sk_buff *skb)
1858 {
1859 	skb->dev = skb->dst->dev;
1860 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1861 }
1862 
1863 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1864 
1865 static int ip6_pkt_prohibit(struct sk_buff *skb)
1866 {
1867 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1868 }
1869 
1870 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1871 {
1872 	skb->dev = skb->dst->dev;
1873 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1874 }
1875 
1876 static int ip6_pkt_blk_hole(struct sk_buff *skb)
1877 {
1878 	kfree_skb(skb);
1879 	return 0;
1880 }
1881 
1882 #endif
1883 
1884 /*
1885  *	Allocate a dst for local (unicast / anycast) address.
1886  */
1887 
1888 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1889 				    const struct in6_addr *addr,
1890 				    int anycast)
1891 {
1892 	struct rt6_info *rt = ip6_dst_alloc();
1893 
1894 	if (rt == NULL)
1895 		return ERR_PTR(-ENOMEM);
1896 
1897 	dev_hold(init_net.loopback_dev);
1898 	in6_dev_hold(idev);
1899 
1900 	rt->u.dst.flags = DST_HOST;
1901 	rt->u.dst.input = ip6_input;
1902 	rt->u.dst.output = ip6_output;
1903 	rt->rt6i_dev = init_net.loopback_dev;
1904 	rt->rt6i_idev = idev;
1905 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1906 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1907 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1908 	rt->u.dst.obsolete = -1;
1909 
1910 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1911 	if (anycast)
1912 		rt->rt6i_flags |= RTF_ANYCAST;
1913 	else
1914 		rt->rt6i_flags |= RTF_LOCAL;
1915 	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1916 	if (rt->rt6i_nexthop == NULL) {
1917 		dst_free(&rt->u.dst);
1918 		return ERR_PTR(-ENOMEM);
1919 	}
1920 
1921 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1922 	rt->rt6i_dst.plen = 128;
1923 	rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1924 
1925 	atomic_set(&rt->u.dst.__refcnt, 1);
1926 
1927 	return rt;
1928 }
1929 
1930 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1931 {
1932 	if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1933 	    rt != &ip6_null_entry) {
1934 		RT6_TRACE("deleted by ifdown %p\n", rt);
1935 		return -1;
1936 	}
1937 	return 0;
1938 }
1939 
1940 void rt6_ifdown(struct net_device *dev)
1941 {
1942 	fib6_clean_all(fib6_ifdown, 0, dev);
1943 }
1944 
1945 struct rt6_mtu_change_arg
1946 {
1947 	struct net_device *dev;
1948 	unsigned mtu;
1949 };
1950 
1951 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1952 {
1953 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1954 	struct inet6_dev *idev;
1955 
1956 	/* In IPv6 pmtu discovery is not optional,
1957 	   so that RTAX_MTU lock cannot disable it.
1958 	   We still use this lock to block changes
1959 	   caused by addrconf/ndisc.
1960 	*/
1961 
1962 	idev = __in6_dev_get(arg->dev);
1963 	if (idev == NULL)
1964 		return 0;
1965 
1966 	/* For administrative MTU increase, there is no way to discover
1967 	   IPv6 PMTU increase, so PMTU increase should be updated here.
1968 	   Since RFC 1981 doesn't include administrative MTU increase
1969 	   update PMTU increase is a MUST. (i.e. jumbo frame)
1970 	 */
1971 	/*
1972 	   If new MTU is less than route PMTU, this new MTU will be the
1973 	   lowest MTU in the path, update the route PMTU to reflect PMTU
1974 	   decreases; if new MTU is greater than route PMTU, and the
1975 	   old MTU is the lowest MTU in the path, update the route PMTU
1976 	   to reflect the increase. In this case if the other nodes' MTU
1977 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
1978 	   PMTU discouvery.
1979 	 */
1980 	if (rt->rt6i_dev == arg->dev &&
1981 	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1982 	    (dst_mtu(&rt->u.dst) > arg->mtu ||
1983 	     (dst_mtu(&rt->u.dst) < arg->mtu &&
1984 	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1985 		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1986 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1987 	}
1988 	return 0;
1989 }
1990 
1991 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1992 {
1993 	struct rt6_mtu_change_arg arg = {
1994 		.dev = dev,
1995 		.mtu = mtu,
1996 	};
1997 
1998 	fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1999 }
2000 
2001 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2002 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2003 	[RTA_OIF]               = { .type = NLA_U32 },
2004 	[RTA_IIF]		= { .type = NLA_U32 },
2005 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2006 	[RTA_METRICS]           = { .type = NLA_NESTED },
2007 };
2008 
2009 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2010 			      struct fib6_config *cfg)
2011 {
2012 	struct rtmsg *rtm;
2013 	struct nlattr *tb[RTA_MAX+1];
2014 	int err;
2015 
2016 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2017 	if (err < 0)
2018 		goto errout;
2019 
2020 	err = -EINVAL;
2021 	rtm = nlmsg_data(nlh);
2022 	memset(cfg, 0, sizeof(*cfg));
2023 
2024 	cfg->fc_table = rtm->rtm_table;
2025 	cfg->fc_dst_len = rtm->rtm_dst_len;
2026 	cfg->fc_src_len = rtm->rtm_src_len;
2027 	cfg->fc_flags = RTF_UP;
2028 	cfg->fc_protocol = rtm->rtm_protocol;
2029 
2030 	if (rtm->rtm_type == RTN_UNREACHABLE)
2031 		cfg->fc_flags |= RTF_REJECT;
2032 
2033 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2034 	cfg->fc_nlinfo.nlh = nlh;
2035 
2036 	if (tb[RTA_GATEWAY]) {
2037 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2038 		cfg->fc_flags |= RTF_GATEWAY;
2039 	}
2040 
2041 	if (tb[RTA_DST]) {
2042 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2043 
2044 		if (nla_len(tb[RTA_DST]) < plen)
2045 			goto errout;
2046 
2047 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2048 	}
2049 
2050 	if (tb[RTA_SRC]) {
2051 		int plen = (rtm->rtm_src_len + 7) >> 3;
2052 
2053 		if (nla_len(tb[RTA_SRC]) < plen)
2054 			goto errout;
2055 
2056 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2057 	}
2058 
2059 	if (tb[RTA_OIF])
2060 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2061 
2062 	if (tb[RTA_PRIORITY])
2063 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2064 
2065 	if (tb[RTA_METRICS]) {
2066 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2067 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2068 	}
2069 
2070 	if (tb[RTA_TABLE])
2071 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2072 
2073 	err = 0;
2074 errout:
2075 	return err;
2076 }
2077 
2078 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2079 {
2080 	struct fib6_config cfg;
2081 	int err;
2082 
2083 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2084 	if (err < 0)
2085 		return err;
2086 
2087 	return ip6_route_del(&cfg);
2088 }
2089 
2090 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2091 {
2092 	struct fib6_config cfg;
2093 	int err;
2094 
2095 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2096 	if (err < 0)
2097 		return err;
2098 
2099 	return ip6_route_add(&cfg);
2100 }
2101 
2102 static inline size_t rt6_nlmsg_size(void)
2103 {
2104 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2105 	       + nla_total_size(16) /* RTA_SRC */
2106 	       + nla_total_size(16) /* RTA_DST */
2107 	       + nla_total_size(16) /* RTA_GATEWAY */
2108 	       + nla_total_size(16) /* RTA_PREFSRC */
2109 	       + nla_total_size(4) /* RTA_TABLE */
2110 	       + nla_total_size(4) /* RTA_IIF */
2111 	       + nla_total_size(4) /* RTA_OIF */
2112 	       + nla_total_size(4) /* RTA_PRIORITY */
2113 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2114 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2115 }
2116 
2117 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2118 			 struct in6_addr *dst, struct in6_addr *src,
2119 			 int iif, int type, u32 pid, u32 seq,
2120 			 int prefix, unsigned int flags)
2121 {
2122 	struct rtmsg *rtm;
2123 	struct nlmsghdr *nlh;
2124 	long expires;
2125 	u32 table;
2126 
2127 	if (prefix) {	/* user wants prefix routes only */
2128 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2129 			/* success since this is not a prefix route */
2130 			return 1;
2131 		}
2132 	}
2133 
2134 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2135 	if (nlh == NULL)
2136 		return -EMSGSIZE;
2137 
2138 	rtm = nlmsg_data(nlh);
2139 	rtm->rtm_family = AF_INET6;
2140 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2141 	rtm->rtm_src_len = rt->rt6i_src.plen;
2142 	rtm->rtm_tos = 0;
2143 	if (rt->rt6i_table)
2144 		table = rt->rt6i_table->tb6_id;
2145 	else
2146 		table = RT6_TABLE_UNSPEC;
2147 	rtm->rtm_table = table;
2148 	NLA_PUT_U32(skb, RTA_TABLE, table);
2149 	if (rt->rt6i_flags&RTF_REJECT)
2150 		rtm->rtm_type = RTN_UNREACHABLE;
2151 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2152 		rtm->rtm_type = RTN_LOCAL;
2153 	else
2154 		rtm->rtm_type = RTN_UNICAST;
2155 	rtm->rtm_flags = 0;
2156 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2157 	rtm->rtm_protocol = rt->rt6i_protocol;
2158 	if (rt->rt6i_flags&RTF_DYNAMIC)
2159 		rtm->rtm_protocol = RTPROT_REDIRECT;
2160 	else if (rt->rt6i_flags & RTF_ADDRCONF)
2161 		rtm->rtm_protocol = RTPROT_KERNEL;
2162 	else if (rt->rt6i_flags&RTF_DEFAULT)
2163 		rtm->rtm_protocol = RTPROT_RA;
2164 
2165 	if (rt->rt6i_flags&RTF_CACHE)
2166 		rtm->rtm_flags |= RTM_F_CLONED;
2167 
2168 	if (dst) {
2169 		NLA_PUT(skb, RTA_DST, 16, dst);
2170 		rtm->rtm_dst_len = 128;
2171 	} else if (rtm->rtm_dst_len)
2172 		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2173 #ifdef CONFIG_IPV6_SUBTREES
2174 	if (src) {
2175 		NLA_PUT(skb, RTA_SRC, 16, src);
2176 		rtm->rtm_src_len = 128;
2177 	} else if (rtm->rtm_src_len)
2178 		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2179 #endif
2180 	if (iif)
2181 		NLA_PUT_U32(skb, RTA_IIF, iif);
2182 	else if (dst) {
2183 		struct in6_addr saddr_buf;
2184 		if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2185 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2186 	}
2187 
2188 	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2189 		goto nla_put_failure;
2190 
2191 	if (rt->u.dst.neighbour)
2192 		NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2193 
2194 	if (rt->u.dst.dev)
2195 		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2196 
2197 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2198 
2199 	expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2200 	if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2201 			       expires, rt->u.dst.error) < 0)
2202 		goto nla_put_failure;
2203 
2204 	return nlmsg_end(skb, nlh);
2205 
2206 nla_put_failure:
2207 	nlmsg_cancel(skb, nlh);
2208 	return -EMSGSIZE;
2209 }
2210 
2211 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2212 {
2213 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2214 	int prefix;
2215 
2216 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2217 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2218 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2219 	} else
2220 		prefix = 0;
2221 
2222 	return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2223 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2224 		     prefix, NLM_F_MULTI);
2225 }
2226 
2227 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2228 {
2229 	struct nlattr *tb[RTA_MAX+1];
2230 	struct rt6_info *rt;
2231 	struct sk_buff *skb;
2232 	struct rtmsg *rtm;
2233 	struct flowi fl;
2234 	int err, iif = 0;
2235 
2236 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2237 	if (err < 0)
2238 		goto errout;
2239 
2240 	err = -EINVAL;
2241 	memset(&fl, 0, sizeof(fl));
2242 
2243 	if (tb[RTA_SRC]) {
2244 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2245 			goto errout;
2246 
2247 		ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2248 	}
2249 
2250 	if (tb[RTA_DST]) {
2251 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2252 			goto errout;
2253 
2254 		ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2255 	}
2256 
2257 	if (tb[RTA_IIF])
2258 		iif = nla_get_u32(tb[RTA_IIF]);
2259 
2260 	if (tb[RTA_OIF])
2261 		fl.oif = nla_get_u32(tb[RTA_OIF]);
2262 
2263 	if (iif) {
2264 		struct net_device *dev;
2265 		dev = __dev_get_by_index(&init_net, iif);
2266 		if (!dev) {
2267 			err = -ENODEV;
2268 			goto errout;
2269 		}
2270 	}
2271 
2272 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2273 	if (skb == NULL) {
2274 		err = -ENOBUFS;
2275 		goto errout;
2276 	}
2277 
2278 	/* Reserve room for dummy headers, this skb can pass
2279 	   through good chunk of routing engine.
2280 	 */
2281 	skb_reset_mac_header(skb);
2282 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2283 
2284 	rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2285 	skb->dst = &rt->u.dst;
2286 
2287 	err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2288 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2289 			    nlh->nlmsg_seq, 0, 0);
2290 	if (err < 0) {
2291 		kfree_skb(skb);
2292 		goto errout;
2293 	}
2294 
2295 	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2296 errout:
2297 	return err;
2298 }
2299 
2300 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2301 {
2302 	struct sk_buff *skb;
2303 	u32 pid = 0, seq = 0;
2304 	struct nlmsghdr *nlh = NULL;
2305 	int err = -ENOBUFS;
2306 
2307 	if (info) {
2308 		pid = info->pid;
2309 		nlh = info->nlh;
2310 		if (nlh)
2311 			seq = nlh->nlmsg_seq;
2312 	}
2313 
2314 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2315 	if (skb == NULL)
2316 		goto errout;
2317 
2318 	err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2319 	if (err < 0) {
2320 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2321 		WARN_ON(err == -EMSGSIZE);
2322 		kfree_skb(skb);
2323 		goto errout;
2324 	}
2325 	err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2326 errout:
2327 	if (err < 0)
2328 		rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
2329 }
2330 
2331 /*
2332  *	/proc
2333  */
2334 
2335 #ifdef CONFIG_PROC_FS
2336 
2337 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2338 
2339 struct rt6_proc_arg
2340 {
2341 	char *buffer;
2342 	int offset;
2343 	int length;
2344 	int skip;
2345 	int len;
2346 };
2347 
2348 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2349 {
2350 	struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2351 
2352 	if (arg->skip < arg->offset / RT6_INFO_LEN) {
2353 		arg->skip++;
2354 		return 0;
2355 	}
2356 
2357 	if (arg->len >= arg->length)
2358 		return 0;
2359 
2360 	arg->len += sprintf(arg->buffer + arg->len,
2361 			    NIP6_SEQFMT " %02x ",
2362 			    NIP6(rt->rt6i_dst.addr),
2363 			    rt->rt6i_dst.plen);
2364 
2365 #ifdef CONFIG_IPV6_SUBTREES
2366 	arg->len += sprintf(arg->buffer + arg->len,
2367 			    NIP6_SEQFMT " %02x ",
2368 			    NIP6(rt->rt6i_src.addr),
2369 			    rt->rt6i_src.plen);
2370 #else
2371 	arg->len += sprintf(arg->buffer + arg->len,
2372 			    "00000000000000000000000000000000 00 ");
2373 #endif
2374 
2375 	if (rt->rt6i_nexthop) {
2376 		arg->len += sprintf(arg->buffer + arg->len,
2377 				    NIP6_SEQFMT,
2378 				    NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2379 	} else {
2380 		arg->len += sprintf(arg->buffer + arg->len,
2381 				    "00000000000000000000000000000000");
2382 	}
2383 	arg->len += sprintf(arg->buffer + arg->len,
2384 			    " %08x %08x %08x %08x %8s\n",
2385 			    rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2386 			    rt->u.dst.__use, rt->rt6i_flags,
2387 			    rt->rt6i_dev ? rt->rt6i_dev->name : "");
2388 	return 0;
2389 }
2390 
2391 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2392 {
2393 	struct rt6_proc_arg arg = {
2394 		.buffer = buffer,
2395 		.offset = offset,
2396 		.length = length,
2397 	};
2398 
2399 	fib6_clean_all(rt6_info_route, 0, &arg);
2400 
2401 	*start = buffer;
2402 	if (offset)
2403 		*start += offset % RT6_INFO_LEN;
2404 
2405 	arg.len -= offset % RT6_INFO_LEN;
2406 
2407 	if (arg.len > length)
2408 		arg.len = length;
2409 	if (arg.len < 0)
2410 		arg.len = 0;
2411 
2412 	return arg.len;
2413 }
2414 
2415 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2416 {
2417 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2418 		      rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2419 		      rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2420 		      rt6_stats.fib_rt_cache,
2421 		      atomic_read(&ip6_dst_ops.entries),
2422 		      rt6_stats.fib_discarded_routes);
2423 
2424 	return 0;
2425 }
2426 
2427 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2428 {
2429 	return single_open(file, rt6_stats_seq_show, NULL);
2430 }
2431 
2432 static const struct file_operations rt6_stats_seq_fops = {
2433 	.owner	 = THIS_MODULE,
2434 	.open	 = rt6_stats_seq_open,
2435 	.read	 = seq_read,
2436 	.llseek	 = seq_lseek,
2437 	.release = single_release,
2438 };
2439 #endif	/* CONFIG_PROC_FS */
2440 
2441 #ifdef CONFIG_SYSCTL
2442 
2443 static int flush_delay;
2444 
2445 static
2446 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2447 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2448 {
2449 	if (write) {
2450 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2451 		fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2452 		return 0;
2453 	} else
2454 		return -EINVAL;
2455 }
2456 
2457 ctl_table ipv6_route_table[] = {
2458 	{
2459 		.ctl_name	=	NET_IPV6_ROUTE_FLUSH,
2460 		.procname	=	"flush",
2461 		.data		=	&flush_delay,
2462 		.maxlen		=	sizeof(int),
2463 		.mode		=	0200,
2464 		.proc_handler	=	&ipv6_sysctl_rtcache_flush
2465 	},
2466 	{
2467 		.ctl_name	=	NET_IPV6_ROUTE_GC_THRESH,
2468 		.procname	=	"gc_thresh",
2469 		.data		=	&ip6_dst_ops.gc_thresh,
2470 		.maxlen		=	sizeof(int),
2471 		.mode		=	0644,
2472 		.proc_handler	=	&proc_dointvec,
2473 	},
2474 	{
2475 		.ctl_name	=	NET_IPV6_ROUTE_MAX_SIZE,
2476 		.procname	=	"max_size",
2477 		.data		=	&ip6_rt_max_size,
2478 		.maxlen		=	sizeof(int),
2479 		.mode		=	0644,
2480 		.proc_handler	=	&proc_dointvec,
2481 	},
2482 	{
2483 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2484 		.procname	=	"gc_min_interval",
2485 		.data		=	&ip6_rt_gc_min_interval,
2486 		.maxlen		=	sizeof(int),
2487 		.mode		=	0644,
2488 		.proc_handler	=	&proc_dointvec_jiffies,
2489 		.strategy	=	&sysctl_jiffies,
2490 	},
2491 	{
2492 		.ctl_name	=	NET_IPV6_ROUTE_GC_TIMEOUT,
2493 		.procname	=	"gc_timeout",
2494 		.data		=	&ip6_rt_gc_timeout,
2495 		.maxlen		=	sizeof(int),
2496 		.mode		=	0644,
2497 		.proc_handler	=	&proc_dointvec_jiffies,
2498 		.strategy	=	&sysctl_jiffies,
2499 	},
2500 	{
2501 		.ctl_name	=	NET_IPV6_ROUTE_GC_INTERVAL,
2502 		.procname	=	"gc_interval",
2503 		.data		=	&ip6_rt_gc_interval,
2504 		.maxlen		=	sizeof(int),
2505 		.mode		=	0644,
2506 		.proc_handler	=	&proc_dointvec_jiffies,
2507 		.strategy	=	&sysctl_jiffies,
2508 	},
2509 	{
2510 		.ctl_name	=	NET_IPV6_ROUTE_GC_ELASTICITY,
2511 		.procname	=	"gc_elasticity",
2512 		.data		=	&ip6_rt_gc_elasticity,
2513 		.maxlen		=	sizeof(int),
2514 		.mode		=	0644,
2515 		.proc_handler	=	&proc_dointvec_jiffies,
2516 		.strategy	=	&sysctl_jiffies,
2517 	},
2518 	{
2519 		.ctl_name	=	NET_IPV6_ROUTE_MTU_EXPIRES,
2520 		.procname	=	"mtu_expires",
2521 		.data		=	&ip6_rt_mtu_expires,
2522 		.maxlen		=	sizeof(int),
2523 		.mode		=	0644,
2524 		.proc_handler	=	&proc_dointvec_jiffies,
2525 		.strategy	=	&sysctl_jiffies,
2526 	},
2527 	{
2528 		.ctl_name	=	NET_IPV6_ROUTE_MIN_ADVMSS,
2529 		.procname	=	"min_adv_mss",
2530 		.data		=	&ip6_rt_min_advmss,
2531 		.maxlen		=	sizeof(int),
2532 		.mode		=	0644,
2533 		.proc_handler	=	&proc_dointvec_jiffies,
2534 		.strategy	=	&sysctl_jiffies,
2535 	},
2536 	{
2537 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2538 		.procname	=	"gc_min_interval_ms",
2539 		.data		=	&ip6_rt_gc_min_interval,
2540 		.maxlen		=	sizeof(int),
2541 		.mode		=	0644,
2542 		.proc_handler	=	&proc_dointvec_ms_jiffies,
2543 		.strategy	=	&sysctl_ms_jiffies,
2544 	},
2545 	{ .ctl_name = 0 }
2546 };
2547 
2548 #endif
2549 
2550 void __init ip6_route_init(void)
2551 {
2552 #ifdef 	CONFIG_PROC_FS
2553 	struct proc_dir_entry *p;
2554 #endif
2555 	ip6_dst_ops.kmem_cachep =
2556 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2557 				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
2558 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2559 
2560 	fib6_init();
2561 #ifdef 	CONFIG_PROC_FS
2562 	p = proc_net_create(&init_net, "ipv6_route", 0, rt6_proc_info);
2563 	if (p)
2564 		p->owner = THIS_MODULE;
2565 
2566 	proc_net_fops_create(&init_net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2567 #endif
2568 #ifdef CONFIG_XFRM
2569 	xfrm6_init();
2570 #endif
2571 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2572 	fib6_rules_init();
2573 #endif
2574 
2575 	__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL);
2576 	__rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL);
2577 	__rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL);
2578 }
2579 
2580 void ip6_route_cleanup(void)
2581 {
2582 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2583 	fib6_rules_cleanup();
2584 #endif
2585 #ifdef CONFIG_PROC_FS
2586 	proc_net_remove(&init_net, "ipv6_route");
2587 	proc_net_remove(&init_net, "rt6_stats");
2588 #endif
2589 #ifdef CONFIG_XFRM
2590 	xfrm6_fini();
2591 #endif
2592 	rt6_ifdown(NULL);
2593 	fib6_gc_cleanup();
2594 	kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2595 }
2596