xref: /linux/net/ipv6/route.c (revision 42fda66387daa53538ae13a2c858396aaf037158)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	$Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *	This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15 
16 /*	Changes:
17  *
18  *	YOSHIFUJI Hideaki @USAGI
19  *		reworked default router selection.
20  *		- respect outgoing interface
21  *		- select from (probably) reachable routers (i.e.
22  *		routers in REACHABLE, STALE, DELAY or PROBE states).
23  *		- always select the same router if it is (probably)
24  *		reachable.  otherwise, round-robin the list.
25  *	Ville Nuorvala
26  *		Fixed routing subtrees.
27  */
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 
42 #ifdef 	CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46 
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 
61 #include <asm/uaccess.h>
62 
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
66 
67 /* Set to 3 to get tracing. */
68 #define RT6_DEBUG 2
69 
70 #if RT6_DEBUG >= 3
71 #define RDBG(x) printk x
72 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
73 #else
74 #define RDBG(x)
75 #define RT6_TRACE(x...) do { ; } while (0)
76 #endif
77 
78 #define CLONE_OFFLINK_ROUTE 0
79 
80 static int ip6_rt_max_size = 4096;
81 static int ip6_rt_gc_min_interval = HZ / 2;
82 static int ip6_rt_gc_timeout = 60*HZ;
83 int ip6_rt_gc_interval = 30*HZ;
84 static int ip6_rt_gc_elasticity = 9;
85 static int ip6_rt_mtu_expires = 10*60*HZ;
86 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87 
88 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void		ip6_dst_destroy(struct dst_entry *);
92 static void		ip6_dst_ifdown(struct dst_entry *,
93 				       struct net_device *dev, int how);
94 static int		 ip6_dst_gc(void);
95 
96 static int		ip6_pkt_discard(struct sk_buff *skb);
97 static int		ip6_pkt_discard_out(struct sk_buff *skb);
98 static void		ip6_link_failure(struct sk_buff *skb);
99 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100 
101 #ifdef CONFIG_IPV6_ROUTE_INFO
102 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103 					   struct in6_addr *gwaddr, int ifindex,
104 					   unsigned pref);
105 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106 					   struct in6_addr *gwaddr, int ifindex);
107 #endif
108 
109 static struct dst_ops ip6_dst_ops = {
110 	.family			=	AF_INET6,
111 	.protocol		=	__constant_htons(ETH_P_IPV6),
112 	.gc			=	ip6_dst_gc,
113 	.gc_thresh		=	1024,
114 	.check			=	ip6_dst_check,
115 	.destroy		=	ip6_dst_destroy,
116 	.ifdown			=	ip6_dst_ifdown,
117 	.negative_advice	=	ip6_negative_advice,
118 	.link_failure		=	ip6_link_failure,
119 	.update_pmtu		=	ip6_rt_update_pmtu,
120 	.entry_size		=	sizeof(struct rt6_info),
121 };
122 
123 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
124 {
125 }
126 
127 static struct dst_ops ip6_dst_blackhole_ops = {
128 	.family			=	AF_INET6,
129 	.protocol		=	__constant_htons(ETH_P_IPV6),
130 	.destroy		=	ip6_dst_destroy,
131 	.check			=	ip6_dst_check,
132 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
133 	.entry_size		=	sizeof(struct rt6_info),
134 };
135 
136 struct rt6_info ip6_null_entry = {
137 	.u = {
138 		.dst = {
139 			.__refcnt	= ATOMIC_INIT(1),
140 			.__use		= 1,
141 			.obsolete	= -1,
142 			.error		= -ENETUNREACH,
143 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
144 			.input		= ip6_pkt_discard,
145 			.output		= ip6_pkt_discard_out,
146 			.ops		= &ip6_dst_ops,
147 			.path		= (struct dst_entry*)&ip6_null_entry,
148 		}
149 	},
150 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
151 	.rt6i_metric	= ~(u32) 0,
152 	.rt6i_ref	= ATOMIC_INIT(1),
153 };
154 
155 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
156 
157 static int ip6_pkt_prohibit(struct sk_buff *skb);
158 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
159 static int ip6_pkt_blk_hole(struct sk_buff *skb);
160 
161 struct rt6_info ip6_prohibit_entry = {
162 	.u = {
163 		.dst = {
164 			.__refcnt	= ATOMIC_INIT(1),
165 			.__use		= 1,
166 			.obsolete	= -1,
167 			.error		= -EACCES,
168 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
169 			.input		= ip6_pkt_prohibit,
170 			.output		= ip6_pkt_prohibit_out,
171 			.ops		= &ip6_dst_ops,
172 			.path		= (struct dst_entry*)&ip6_prohibit_entry,
173 		}
174 	},
175 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
176 	.rt6i_metric	= ~(u32) 0,
177 	.rt6i_ref	= ATOMIC_INIT(1),
178 };
179 
180 struct rt6_info ip6_blk_hole_entry = {
181 	.u = {
182 		.dst = {
183 			.__refcnt	= ATOMIC_INIT(1),
184 			.__use		= 1,
185 			.obsolete	= -1,
186 			.error		= -EINVAL,
187 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
188 			.input		= ip6_pkt_blk_hole,
189 			.output		= ip6_pkt_blk_hole,
190 			.ops		= &ip6_dst_ops,
191 			.path		= (struct dst_entry*)&ip6_blk_hole_entry,
192 		}
193 	},
194 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
195 	.rt6i_metric	= ~(u32) 0,
196 	.rt6i_ref	= ATOMIC_INIT(1),
197 };
198 
199 #endif
200 
201 /* allocate dst with ip6_dst_ops */
202 static __inline__ struct rt6_info *ip6_dst_alloc(void)
203 {
204 	return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
205 }
206 
207 static void ip6_dst_destroy(struct dst_entry *dst)
208 {
209 	struct rt6_info *rt = (struct rt6_info *)dst;
210 	struct inet6_dev *idev = rt->rt6i_idev;
211 
212 	if (idev != NULL) {
213 		rt->rt6i_idev = NULL;
214 		in6_dev_put(idev);
215 	}
216 }
217 
218 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
219 			   int how)
220 {
221 	struct rt6_info *rt = (struct rt6_info *)dst;
222 	struct inet6_dev *idev = rt->rt6i_idev;
223 
224 	if (dev != init_net.loopback_dev && idev != NULL && idev->dev == dev) {
225 		struct inet6_dev *loopback_idev = in6_dev_get(init_net.loopback_dev);
226 		if (loopback_idev != NULL) {
227 			rt->rt6i_idev = loopback_idev;
228 			in6_dev_put(idev);
229 		}
230 	}
231 }
232 
233 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
234 {
235 	return (rt->rt6i_flags & RTF_EXPIRES &&
236 		time_after(jiffies, rt->rt6i_expires));
237 }
238 
239 static inline int rt6_need_strict(struct in6_addr *daddr)
240 {
241 	return (ipv6_addr_type(daddr) &
242 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
243 }
244 
245 /*
246  *	Route lookup. Any table->tb6_lock is implied.
247  */
248 
249 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
250 						    int oif,
251 						    int strict)
252 {
253 	struct rt6_info *local = NULL;
254 	struct rt6_info *sprt;
255 
256 	if (oif) {
257 		for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
258 			struct net_device *dev = sprt->rt6i_dev;
259 			if (dev->ifindex == oif)
260 				return sprt;
261 			if (dev->flags & IFF_LOOPBACK) {
262 				if (sprt->rt6i_idev == NULL ||
263 				    sprt->rt6i_idev->dev->ifindex != oif) {
264 					if (strict && oif)
265 						continue;
266 					if (local && (!oif ||
267 						      local->rt6i_idev->dev->ifindex == oif))
268 						continue;
269 				}
270 				local = sprt;
271 			}
272 		}
273 
274 		if (local)
275 			return local;
276 
277 		if (strict)
278 			return &ip6_null_entry;
279 	}
280 	return rt;
281 }
282 
283 #ifdef CONFIG_IPV6_ROUTER_PREF
284 static void rt6_probe(struct rt6_info *rt)
285 {
286 	struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
287 	/*
288 	 * Okay, this does not seem to be appropriate
289 	 * for now, however, we need to check if it
290 	 * is really so; aka Router Reachability Probing.
291 	 *
292 	 * Router Reachability Probe MUST be rate-limited
293 	 * to no more than one per minute.
294 	 */
295 	if (!neigh || (neigh->nud_state & NUD_VALID))
296 		return;
297 	read_lock_bh(&neigh->lock);
298 	if (!(neigh->nud_state & NUD_VALID) &&
299 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
300 		struct in6_addr mcaddr;
301 		struct in6_addr *target;
302 
303 		neigh->updated = jiffies;
304 		read_unlock_bh(&neigh->lock);
305 
306 		target = (struct in6_addr *)&neigh->primary_key;
307 		addrconf_addr_solict_mult(target, &mcaddr);
308 		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
309 	} else
310 		read_unlock_bh(&neigh->lock);
311 }
312 #else
313 static inline void rt6_probe(struct rt6_info *rt)
314 {
315 	return;
316 }
317 #endif
318 
319 /*
320  * Default Router Selection (RFC 2461 6.3.6)
321  */
322 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
323 {
324 	struct net_device *dev = rt->rt6i_dev;
325 	if (!oif || dev->ifindex == oif)
326 		return 2;
327 	if ((dev->flags & IFF_LOOPBACK) &&
328 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
329 		return 1;
330 	return 0;
331 }
332 
333 static inline int rt6_check_neigh(struct rt6_info *rt)
334 {
335 	struct neighbour *neigh = rt->rt6i_nexthop;
336 	int m = 0;
337 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
338 	    !(rt->rt6i_flags & RTF_GATEWAY))
339 		m = 1;
340 	else if (neigh) {
341 		read_lock_bh(&neigh->lock);
342 		if (neigh->nud_state & NUD_VALID)
343 			m = 2;
344 		else if (!(neigh->nud_state & NUD_FAILED))
345 			m = 1;
346 		read_unlock_bh(&neigh->lock);
347 	}
348 	return m;
349 }
350 
351 static int rt6_score_route(struct rt6_info *rt, int oif,
352 			   int strict)
353 {
354 	int m, n;
355 
356 	m = rt6_check_dev(rt, oif);
357 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
358 		return -1;
359 #ifdef CONFIG_IPV6_ROUTER_PREF
360 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
361 #endif
362 	n = rt6_check_neigh(rt);
363 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
364 		return -1;
365 	return m;
366 }
367 
368 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
369 				   int *mpri, struct rt6_info *match)
370 {
371 	int m;
372 
373 	if (rt6_check_expired(rt))
374 		goto out;
375 
376 	m = rt6_score_route(rt, oif, strict);
377 	if (m < 0)
378 		goto out;
379 
380 	if (m > *mpri) {
381 		if (strict & RT6_LOOKUP_F_REACHABLE)
382 			rt6_probe(match);
383 		*mpri = m;
384 		match = rt;
385 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
386 		rt6_probe(rt);
387 	}
388 
389 out:
390 	return match;
391 }
392 
393 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
394 				     struct rt6_info *rr_head,
395 				     u32 metric, int oif, int strict)
396 {
397 	struct rt6_info *rt, *match;
398 	int mpri = -1;
399 
400 	match = NULL;
401 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
402 	     rt = rt->u.dst.rt6_next)
403 		match = find_match(rt, oif, strict, &mpri, match);
404 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
405 	     rt = rt->u.dst.rt6_next)
406 		match = find_match(rt, oif, strict, &mpri, match);
407 
408 	return match;
409 }
410 
411 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
412 {
413 	struct rt6_info *match, *rt0;
414 
415 	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
416 		  __FUNCTION__, fn->leaf, oif);
417 
418 	rt0 = fn->rr_ptr;
419 	if (!rt0)
420 		fn->rr_ptr = rt0 = fn->leaf;
421 
422 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
423 
424 	if (!match &&
425 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
426 		struct rt6_info *next = rt0->u.dst.rt6_next;
427 
428 		/* no entries matched; do round-robin */
429 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
430 			next = fn->leaf;
431 
432 		if (next != rt0)
433 			fn->rr_ptr = next;
434 	}
435 
436 	RT6_TRACE("%s() => %p\n",
437 		  __FUNCTION__, match);
438 
439 	return (match ? match : &ip6_null_entry);
440 }
441 
442 #ifdef CONFIG_IPV6_ROUTE_INFO
443 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
444 		  struct in6_addr *gwaddr)
445 {
446 	struct route_info *rinfo = (struct route_info *) opt;
447 	struct in6_addr prefix_buf, *prefix;
448 	unsigned int pref;
449 	u32 lifetime;
450 	struct rt6_info *rt;
451 
452 	if (len < sizeof(struct route_info)) {
453 		return -EINVAL;
454 	}
455 
456 	/* Sanity check for prefix_len and length */
457 	if (rinfo->length > 3) {
458 		return -EINVAL;
459 	} else if (rinfo->prefix_len > 128) {
460 		return -EINVAL;
461 	} else if (rinfo->prefix_len > 64) {
462 		if (rinfo->length < 2) {
463 			return -EINVAL;
464 		}
465 	} else if (rinfo->prefix_len > 0) {
466 		if (rinfo->length < 1) {
467 			return -EINVAL;
468 		}
469 	}
470 
471 	pref = rinfo->route_pref;
472 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
473 		pref = ICMPV6_ROUTER_PREF_MEDIUM;
474 
475 	lifetime = ntohl(rinfo->lifetime);
476 	if (lifetime == 0xffffffff) {
477 		/* infinity */
478 	} else if (lifetime > 0x7fffffff/HZ) {
479 		/* Avoid arithmetic overflow */
480 		lifetime = 0x7fffffff/HZ - 1;
481 	}
482 
483 	if (rinfo->length == 3)
484 		prefix = (struct in6_addr *)rinfo->prefix;
485 	else {
486 		/* this function is safe */
487 		ipv6_addr_prefix(&prefix_buf,
488 				 (struct in6_addr *)rinfo->prefix,
489 				 rinfo->prefix_len);
490 		prefix = &prefix_buf;
491 	}
492 
493 	rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
494 
495 	if (rt && !lifetime) {
496 		ip6_del_rt(rt);
497 		rt = NULL;
498 	}
499 
500 	if (!rt && lifetime)
501 		rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
502 					pref);
503 	else if (rt)
504 		rt->rt6i_flags = RTF_ROUTEINFO |
505 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
506 
507 	if (rt) {
508 		if (lifetime == 0xffffffff) {
509 			rt->rt6i_flags &= ~RTF_EXPIRES;
510 		} else {
511 			rt->rt6i_expires = jiffies + HZ * lifetime;
512 			rt->rt6i_flags |= RTF_EXPIRES;
513 		}
514 		dst_release(&rt->u.dst);
515 	}
516 	return 0;
517 }
518 #endif
519 
520 #define BACKTRACK(saddr) \
521 do { \
522 	if (rt == &ip6_null_entry) { \
523 		struct fib6_node *pn; \
524 		while (1) { \
525 			if (fn->fn_flags & RTN_TL_ROOT) \
526 				goto out; \
527 			pn = fn->parent; \
528 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
529 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
530 			else \
531 				fn = pn; \
532 			if (fn->fn_flags & RTN_RTINFO) \
533 				goto restart; \
534 		} \
535 	} \
536 } while(0)
537 
538 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
539 					     struct flowi *fl, int flags)
540 {
541 	struct fib6_node *fn;
542 	struct rt6_info *rt;
543 
544 	read_lock_bh(&table->tb6_lock);
545 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
546 restart:
547 	rt = fn->leaf;
548 	rt = rt6_device_match(rt, fl->oif, flags);
549 	BACKTRACK(&fl->fl6_src);
550 out:
551 	dst_hold(&rt->u.dst);
552 	read_unlock_bh(&table->tb6_lock);
553 
554 	rt->u.dst.lastuse = jiffies;
555 	rt->u.dst.__use++;
556 
557 	return rt;
558 
559 }
560 
561 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
562 			    int oif, int strict)
563 {
564 	struct flowi fl = {
565 		.oif = oif,
566 		.nl_u = {
567 			.ip6_u = {
568 				.daddr = *daddr,
569 			},
570 		},
571 	};
572 	struct dst_entry *dst;
573 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
574 
575 	if (saddr) {
576 		memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
577 		flags |= RT6_LOOKUP_F_HAS_SADDR;
578 	}
579 
580 	dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
581 	if (dst->error == 0)
582 		return (struct rt6_info *) dst;
583 
584 	dst_release(dst);
585 
586 	return NULL;
587 }
588 
589 EXPORT_SYMBOL(rt6_lookup);
590 
591 /* ip6_ins_rt is called with FREE table->tb6_lock.
592    It takes new route entry, the addition fails by any reason the
593    route is freed. In any case, if caller does not hold it, it may
594    be destroyed.
595  */
596 
597 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
598 {
599 	int err;
600 	struct fib6_table *table;
601 
602 	table = rt->rt6i_table;
603 	write_lock_bh(&table->tb6_lock);
604 	err = fib6_add(&table->tb6_root, rt, info);
605 	write_unlock_bh(&table->tb6_lock);
606 
607 	return err;
608 }
609 
610 int ip6_ins_rt(struct rt6_info *rt)
611 {
612 	return __ip6_ins_rt(rt, NULL);
613 }
614 
615 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
616 				      struct in6_addr *saddr)
617 {
618 	struct rt6_info *rt;
619 
620 	/*
621 	 *	Clone the route.
622 	 */
623 
624 	rt = ip6_rt_copy(ort);
625 
626 	if (rt) {
627 		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
628 			if (rt->rt6i_dst.plen != 128 &&
629 			    ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
630 				rt->rt6i_flags |= RTF_ANYCAST;
631 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
632 		}
633 
634 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
635 		rt->rt6i_dst.plen = 128;
636 		rt->rt6i_flags |= RTF_CACHE;
637 		rt->u.dst.flags |= DST_HOST;
638 
639 #ifdef CONFIG_IPV6_SUBTREES
640 		if (rt->rt6i_src.plen && saddr) {
641 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
642 			rt->rt6i_src.plen = 128;
643 		}
644 #endif
645 
646 		rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
647 
648 	}
649 
650 	return rt;
651 }
652 
653 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
654 {
655 	struct rt6_info *rt = ip6_rt_copy(ort);
656 	if (rt) {
657 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
658 		rt->rt6i_dst.plen = 128;
659 		rt->rt6i_flags |= RTF_CACHE;
660 		rt->u.dst.flags |= DST_HOST;
661 		rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
662 	}
663 	return rt;
664 }
665 
666 static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif,
667 					    struct flowi *fl, int flags)
668 {
669 	struct fib6_node *fn;
670 	struct rt6_info *rt, *nrt;
671 	int strict = 0;
672 	int attempts = 3;
673 	int err;
674 	int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
675 
676 	strict |= flags & RT6_LOOKUP_F_IFACE;
677 
678 relookup:
679 	read_lock_bh(&table->tb6_lock);
680 
681 restart_2:
682 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
683 
684 restart:
685 	rt = rt6_select(fn, oif, strict | reachable);
686 	BACKTRACK(&fl->fl6_src);
687 	if (rt == &ip6_null_entry ||
688 	    rt->rt6i_flags & RTF_CACHE)
689 		goto out;
690 
691 	dst_hold(&rt->u.dst);
692 	read_unlock_bh(&table->tb6_lock);
693 
694 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
695 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
696 	else {
697 #if CLONE_OFFLINK_ROUTE
698 		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
699 #else
700 		goto out2;
701 #endif
702 	}
703 
704 	dst_release(&rt->u.dst);
705 	rt = nrt ? : &ip6_null_entry;
706 
707 	dst_hold(&rt->u.dst);
708 	if (nrt) {
709 		err = ip6_ins_rt(nrt);
710 		if (!err)
711 			goto out2;
712 	}
713 
714 	if (--attempts <= 0)
715 		goto out2;
716 
717 	/*
718 	 * Race condition! In the gap, when table->tb6_lock was
719 	 * released someone could insert this route.  Relookup.
720 	 */
721 	dst_release(&rt->u.dst);
722 	goto relookup;
723 
724 out:
725 	if (reachable) {
726 		reachable = 0;
727 		goto restart_2;
728 	}
729 	dst_hold(&rt->u.dst);
730 	read_unlock_bh(&table->tb6_lock);
731 out2:
732 	rt->u.dst.lastuse = jiffies;
733 	rt->u.dst.__use++;
734 
735 	return rt;
736 }
737 
738 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
739 					    struct flowi *fl, int flags)
740 {
741 	return ip6_pol_route(table, fl->iif, fl, flags);
742 }
743 
744 void ip6_route_input(struct sk_buff *skb)
745 {
746 	struct ipv6hdr *iph = ipv6_hdr(skb);
747 	int flags = RT6_LOOKUP_F_HAS_SADDR;
748 	struct flowi fl = {
749 		.iif = skb->dev->ifindex,
750 		.nl_u = {
751 			.ip6_u = {
752 				.daddr = iph->daddr,
753 				.saddr = iph->saddr,
754 				.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
755 			},
756 		},
757 		.mark = skb->mark,
758 		.proto = iph->nexthdr,
759 	};
760 
761 	if (rt6_need_strict(&iph->daddr))
762 		flags |= RT6_LOOKUP_F_IFACE;
763 
764 	skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
765 }
766 
767 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
768 					     struct flowi *fl, int flags)
769 {
770 	return ip6_pol_route(table, fl->oif, fl, flags);
771 }
772 
773 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
774 {
775 	int flags = 0;
776 
777 	if (rt6_need_strict(&fl->fl6_dst))
778 		flags |= RT6_LOOKUP_F_IFACE;
779 
780 	if (!ipv6_addr_any(&fl->fl6_src))
781 		flags |= RT6_LOOKUP_F_HAS_SADDR;
782 
783 	return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
784 }
785 
786 EXPORT_SYMBOL(ip6_route_output);
787 
788 static int ip6_blackhole_output(struct sk_buff *skb)
789 {
790 	kfree_skb(skb);
791 	return 0;
792 }
793 
794 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
795 {
796 	struct rt6_info *ort = (struct rt6_info *) *dstp;
797 	struct rt6_info *rt = (struct rt6_info *)
798 		dst_alloc(&ip6_dst_blackhole_ops);
799 	struct dst_entry *new = NULL;
800 
801 	if (rt) {
802 		new = &rt->u.dst;
803 
804 		atomic_set(&new->__refcnt, 1);
805 		new->__use = 1;
806 		new->input = ip6_blackhole_output;
807 		new->output = ip6_blackhole_output;
808 
809 		memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
810 		new->dev = ort->u.dst.dev;
811 		if (new->dev)
812 			dev_hold(new->dev);
813 		rt->rt6i_idev = ort->rt6i_idev;
814 		if (rt->rt6i_idev)
815 			in6_dev_hold(rt->rt6i_idev);
816 		rt->rt6i_expires = 0;
817 
818 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
819 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
820 		rt->rt6i_metric = 0;
821 
822 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
823 #ifdef CONFIG_IPV6_SUBTREES
824 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
825 #endif
826 
827 		dst_free(new);
828 	}
829 
830 	dst_release(*dstp);
831 	*dstp = new;
832 	return (new ? 0 : -ENOMEM);
833 }
834 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
835 
836 /*
837  *	Destination cache support functions
838  */
839 
840 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
841 {
842 	struct rt6_info *rt;
843 
844 	rt = (struct rt6_info *) dst;
845 
846 	if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
847 		return dst;
848 
849 	return NULL;
850 }
851 
852 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
853 {
854 	struct rt6_info *rt = (struct rt6_info *) dst;
855 
856 	if (rt) {
857 		if (rt->rt6i_flags & RTF_CACHE)
858 			ip6_del_rt(rt);
859 		else
860 			dst_release(dst);
861 	}
862 	return NULL;
863 }
864 
865 static void ip6_link_failure(struct sk_buff *skb)
866 {
867 	struct rt6_info *rt;
868 
869 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
870 
871 	rt = (struct rt6_info *) skb->dst;
872 	if (rt) {
873 		if (rt->rt6i_flags&RTF_CACHE) {
874 			dst_set_expires(&rt->u.dst, 0);
875 			rt->rt6i_flags |= RTF_EXPIRES;
876 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
877 			rt->rt6i_node->fn_sernum = -1;
878 	}
879 }
880 
881 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
882 {
883 	struct rt6_info *rt6 = (struct rt6_info*)dst;
884 
885 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
886 		rt6->rt6i_flags |= RTF_MODIFIED;
887 		if (mtu < IPV6_MIN_MTU) {
888 			mtu = IPV6_MIN_MTU;
889 			dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
890 		}
891 		dst->metrics[RTAX_MTU-1] = mtu;
892 		call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
893 	}
894 }
895 
896 static int ipv6_get_mtu(struct net_device *dev);
897 
898 static inline unsigned int ipv6_advmss(unsigned int mtu)
899 {
900 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
901 
902 	if (mtu < ip6_rt_min_advmss)
903 		mtu = ip6_rt_min_advmss;
904 
905 	/*
906 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
907 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
908 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
909 	 * rely only on pmtu discovery"
910 	 */
911 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
912 		mtu = IPV6_MAXPLEN;
913 	return mtu;
914 }
915 
916 static struct dst_entry *ndisc_dst_gc_list;
917 static DEFINE_SPINLOCK(ndisc_lock);
918 
919 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
920 				  struct neighbour *neigh,
921 				  struct in6_addr *addr,
922 				  int (*output)(struct sk_buff *))
923 {
924 	struct rt6_info *rt;
925 	struct inet6_dev *idev = in6_dev_get(dev);
926 
927 	if (unlikely(idev == NULL))
928 		return NULL;
929 
930 	rt = ip6_dst_alloc();
931 	if (unlikely(rt == NULL)) {
932 		in6_dev_put(idev);
933 		goto out;
934 	}
935 
936 	dev_hold(dev);
937 	if (neigh)
938 		neigh_hold(neigh);
939 	else
940 		neigh = ndisc_get_neigh(dev, addr);
941 
942 	rt->rt6i_dev	  = dev;
943 	rt->rt6i_idev     = idev;
944 	rt->rt6i_nexthop  = neigh;
945 	atomic_set(&rt->u.dst.__refcnt, 1);
946 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
947 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
948 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
949 	rt->u.dst.output  = output;
950 
951 #if 0	/* there's no chance to use these for ndisc */
952 	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
953 				? DST_HOST
954 				: 0;
955 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
956 	rt->rt6i_dst.plen = 128;
957 #endif
958 
959 	spin_lock_bh(&ndisc_lock);
960 	rt->u.dst.next = ndisc_dst_gc_list;
961 	ndisc_dst_gc_list = &rt->u.dst;
962 	spin_unlock_bh(&ndisc_lock);
963 
964 	fib6_force_start_gc();
965 
966 out:
967 	return &rt->u.dst;
968 }
969 
970 int ndisc_dst_gc(int *more)
971 {
972 	struct dst_entry *dst, *next, **pprev;
973 	int freed;
974 
975 	next = NULL;
976 	freed = 0;
977 
978 	spin_lock_bh(&ndisc_lock);
979 	pprev = &ndisc_dst_gc_list;
980 
981 	while ((dst = *pprev) != NULL) {
982 		if (!atomic_read(&dst->__refcnt)) {
983 			*pprev = dst->next;
984 			dst_free(dst);
985 			freed++;
986 		} else {
987 			pprev = &dst->next;
988 			(*more)++;
989 		}
990 	}
991 
992 	spin_unlock_bh(&ndisc_lock);
993 
994 	return freed;
995 }
996 
997 static int ip6_dst_gc(void)
998 {
999 	static unsigned expire = 30*HZ;
1000 	static unsigned long last_gc;
1001 	unsigned long now = jiffies;
1002 
1003 	if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
1004 	    atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
1005 		goto out;
1006 
1007 	expire++;
1008 	fib6_run_gc(expire);
1009 	last_gc = now;
1010 	if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1011 		expire = ip6_rt_gc_timeout>>1;
1012 
1013 out:
1014 	expire -= expire>>ip6_rt_gc_elasticity;
1015 	return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
1016 }
1017 
1018 /* Clean host part of a prefix. Not necessary in radix tree,
1019    but results in cleaner routing tables.
1020 
1021    Remove it only when all the things will work!
1022  */
1023 
1024 static int ipv6_get_mtu(struct net_device *dev)
1025 {
1026 	int mtu = IPV6_MIN_MTU;
1027 	struct inet6_dev *idev;
1028 
1029 	idev = in6_dev_get(dev);
1030 	if (idev) {
1031 		mtu = idev->cnf.mtu6;
1032 		in6_dev_put(idev);
1033 	}
1034 	return mtu;
1035 }
1036 
1037 int ipv6_get_hoplimit(struct net_device *dev)
1038 {
1039 	int hoplimit = ipv6_devconf.hop_limit;
1040 	struct inet6_dev *idev;
1041 
1042 	idev = in6_dev_get(dev);
1043 	if (idev) {
1044 		hoplimit = idev->cnf.hop_limit;
1045 		in6_dev_put(idev);
1046 	}
1047 	return hoplimit;
1048 }
1049 
1050 /*
1051  *
1052  */
1053 
1054 int ip6_route_add(struct fib6_config *cfg)
1055 {
1056 	int err;
1057 	struct rt6_info *rt = NULL;
1058 	struct net_device *dev = NULL;
1059 	struct inet6_dev *idev = NULL;
1060 	struct fib6_table *table;
1061 	int addr_type;
1062 
1063 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1064 		return -EINVAL;
1065 #ifndef CONFIG_IPV6_SUBTREES
1066 	if (cfg->fc_src_len)
1067 		return -EINVAL;
1068 #endif
1069 	if (cfg->fc_ifindex) {
1070 		err = -ENODEV;
1071 		dev = dev_get_by_index(&init_net, cfg->fc_ifindex);
1072 		if (!dev)
1073 			goto out;
1074 		idev = in6_dev_get(dev);
1075 		if (!idev)
1076 			goto out;
1077 	}
1078 
1079 	if (cfg->fc_metric == 0)
1080 		cfg->fc_metric = IP6_RT_PRIO_USER;
1081 
1082 	table = fib6_new_table(cfg->fc_table);
1083 	if (table == NULL) {
1084 		err = -ENOBUFS;
1085 		goto out;
1086 	}
1087 
1088 	rt = ip6_dst_alloc();
1089 
1090 	if (rt == NULL) {
1091 		err = -ENOMEM;
1092 		goto out;
1093 	}
1094 
1095 	rt->u.dst.obsolete = -1;
1096 	rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1097 
1098 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1099 		cfg->fc_protocol = RTPROT_BOOT;
1100 	rt->rt6i_protocol = cfg->fc_protocol;
1101 
1102 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1103 
1104 	if (addr_type & IPV6_ADDR_MULTICAST)
1105 		rt->u.dst.input = ip6_mc_input;
1106 	else
1107 		rt->u.dst.input = ip6_forward;
1108 
1109 	rt->u.dst.output = ip6_output;
1110 
1111 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1112 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1113 	if (rt->rt6i_dst.plen == 128)
1114 	       rt->u.dst.flags = DST_HOST;
1115 
1116 #ifdef CONFIG_IPV6_SUBTREES
1117 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1118 	rt->rt6i_src.plen = cfg->fc_src_len;
1119 #endif
1120 
1121 	rt->rt6i_metric = cfg->fc_metric;
1122 
1123 	/* We cannot add true routes via loopback here,
1124 	   they would result in kernel looping; promote them to reject routes
1125 	 */
1126 	if ((cfg->fc_flags & RTF_REJECT) ||
1127 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1128 		/* hold loopback dev/idev if we haven't done so. */
1129 		if (dev != init_net.loopback_dev) {
1130 			if (dev) {
1131 				dev_put(dev);
1132 				in6_dev_put(idev);
1133 			}
1134 			dev = init_net.loopback_dev;
1135 			dev_hold(dev);
1136 			idev = in6_dev_get(dev);
1137 			if (!idev) {
1138 				err = -ENODEV;
1139 				goto out;
1140 			}
1141 		}
1142 		rt->u.dst.output = ip6_pkt_discard_out;
1143 		rt->u.dst.input = ip6_pkt_discard;
1144 		rt->u.dst.error = -ENETUNREACH;
1145 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1146 		goto install_route;
1147 	}
1148 
1149 	if (cfg->fc_flags & RTF_GATEWAY) {
1150 		struct in6_addr *gw_addr;
1151 		int gwa_type;
1152 
1153 		gw_addr = &cfg->fc_gateway;
1154 		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1155 		gwa_type = ipv6_addr_type(gw_addr);
1156 
1157 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1158 			struct rt6_info *grt;
1159 
1160 			/* IPv6 strictly inhibits using not link-local
1161 			   addresses as nexthop address.
1162 			   Otherwise, router will not able to send redirects.
1163 			   It is very good, but in some (rare!) circumstances
1164 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1165 			   some exceptions. --ANK
1166 			 */
1167 			err = -EINVAL;
1168 			if (!(gwa_type&IPV6_ADDR_UNICAST))
1169 				goto out;
1170 
1171 			grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1172 
1173 			err = -EHOSTUNREACH;
1174 			if (grt == NULL)
1175 				goto out;
1176 			if (dev) {
1177 				if (dev != grt->rt6i_dev) {
1178 					dst_release(&grt->u.dst);
1179 					goto out;
1180 				}
1181 			} else {
1182 				dev = grt->rt6i_dev;
1183 				idev = grt->rt6i_idev;
1184 				dev_hold(dev);
1185 				in6_dev_hold(grt->rt6i_idev);
1186 			}
1187 			if (!(grt->rt6i_flags&RTF_GATEWAY))
1188 				err = 0;
1189 			dst_release(&grt->u.dst);
1190 
1191 			if (err)
1192 				goto out;
1193 		}
1194 		err = -EINVAL;
1195 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1196 			goto out;
1197 	}
1198 
1199 	err = -ENODEV;
1200 	if (dev == NULL)
1201 		goto out;
1202 
1203 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1204 		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1205 		if (IS_ERR(rt->rt6i_nexthop)) {
1206 			err = PTR_ERR(rt->rt6i_nexthop);
1207 			rt->rt6i_nexthop = NULL;
1208 			goto out;
1209 		}
1210 	}
1211 
1212 	rt->rt6i_flags = cfg->fc_flags;
1213 
1214 install_route:
1215 	if (cfg->fc_mx) {
1216 		struct nlattr *nla;
1217 		int remaining;
1218 
1219 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1220 			int type = nla_type(nla);
1221 
1222 			if (type) {
1223 				if (type > RTAX_MAX) {
1224 					err = -EINVAL;
1225 					goto out;
1226 				}
1227 
1228 				rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1229 			}
1230 		}
1231 	}
1232 
1233 	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1234 		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1235 	if (!rt->u.dst.metrics[RTAX_MTU-1])
1236 		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1237 	if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1238 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1239 	rt->u.dst.dev = dev;
1240 	rt->rt6i_idev = idev;
1241 	rt->rt6i_table = table;
1242 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1243 
1244 out:
1245 	if (dev)
1246 		dev_put(dev);
1247 	if (idev)
1248 		in6_dev_put(idev);
1249 	if (rt)
1250 		dst_free(&rt->u.dst);
1251 	return err;
1252 }
1253 
1254 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1255 {
1256 	int err;
1257 	struct fib6_table *table;
1258 
1259 	if (rt == &ip6_null_entry)
1260 		return -ENOENT;
1261 
1262 	table = rt->rt6i_table;
1263 	write_lock_bh(&table->tb6_lock);
1264 
1265 	err = fib6_del(rt, info);
1266 	dst_release(&rt->u.dst);
1267 
1268 	write_unlock_bh(&table->tb6_lock);
1269 
1270 	return err;
1271 }
1272 
1273 int ip6_del_rt(struct rt6_info *rt)
1274 {
1275 	return __ip6_del_rt(rt, NULL);
1276 }
1277 
1278 static int ip6_route_del(struct fib6_config *cfg)
1279 {
1280 	struct fib6_table *table;
1281 	struct fib6_node *fn;
1282 	struct rt6_info *rt;
1283 	int err = -ESRCH;
1284 
1285 	table = fib6_get_table(cfg->fc_table);
1286 	if (table == NULL)
1287 		return err;
1288 
1289 	read_lock_bh(&table->tb6_lock);
1290 
1291 	fn = fib6_locate(&table->tb6_root,
1292 			 &cfg->fc_dst, cfg->fc_dst_len,
1293 			 &cfg->fc_src, cfg->fc_src_len);
1294 
1295 	if (fn) {
1296 		for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1297 			if (cfg->fc_ifindex &&
1298 			    (rt->rt6i_dev == NULL ||
1299 			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1300 				continue;
1301 			if (cfg->fc_flags & RTF_GATEWAY &&
1302 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1303 				continue;
1304 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1305 				continue;
1306 			dst_hold(&rt->u.dst);
1307 			read_unlock_bh(&table->tb6_lock);
1308 
1309 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1310 		}
1311 	}
1312 	read_unlock_bh(&table->tb6_lock);
1313 
1314 	return err;
1315 }
1316 
1317 /*
1318  *	Handle redirects
1319  */
1320 struct ip6rd_flowi {
1321 	struct flowi fl;
1322 	struct in6_addr gateway;
1323 };
1324 
1325 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1326 					     struct flowi *fl,
1327 					     int flags)
1328 {
1329 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1330 	struct rt6_info *rt;
1331 	struct fib6_node *fn;
1332 
1333 	/*
1334 	 * Get the "current" route for this destination and
1335 	 * check if the redirect has come from approriate router.
1336 	 *
1337 	 * RFC 2461 specifies that redirects should only be
1338 	 * accepted if they come from the nexthop to the target.
1339 	 * Due to the way the routes are chosen, this notion
1340 	 * is a bit fuzzy and one might need to check all possible
1341 	 * routes.
1342 	 */
1343 
1344 	read_lock_bh(&table->tb6_lock);
1345 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1346 restart:
1347 	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1348 		/*
1349 		 * Current route is on-link; redirect is always invalid.
1350 		 *
1351 		 * Seems, previous statement is not true. It could
1352 		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1353 		 * But then router serving it might decide, that we should
1354 		 * know truth 8)8) --ANK (980726).
1355 		 */
1356 		if (rt6_check_expired(rt))
1357 			continue;
1358 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1359 			continue;
1360 		if (fl->oif != rt->rt6i_dev->ifindex)
1361 			continue;
1362 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1363 			continue;
1364 		break;
1365 	}
1366 
1367 	if (!rt)
1368 		rt = &ip6_null_entry;
1369 	BACKTRACK(&fl->fl6_src);
1370 out:
1371 	dst_hold(&rt->u.dst);
1372 
1373 	read_unlock_bh(&table->tb6_lock);
1374 
1375 	return rt;
1376 };
1377 
1378 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1379 					   struct in6_addr *src,
1380 					   struct in6_addr *gateway,
1381 					   struct net_device *dev)
1382 {
1383 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1384 	struct ip6rd_flowi rdfl = {
1385 		.fl = {
1386 			.oif = dev->ifindex,
1387 			.nl_u = {
1388 				.ip6_u = {
1389 					.daddr = *dest,
1390 					.saddr = *src,
1391 				},
1392 			},
1393 		},
1394 		.gateway = *gateway,
1395 	};
1396 
1397 	if (rt6_need_strict(dest))
1398 		flags |= RT6_LOOKUP_F_IFACE;
1399 
1400 	return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1401 }
1402 
1403 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1404 		  struct in6_addr *saddr,
1405 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1406 {
1407 	struct rt6_info *rt, *nrt = NULL;
1408 	struct netevent_redirect netevent;
1409 
1410 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1411 
1412 	if (rt == &ip6_null_entry) {
1413 		if (net_ratelimit())
1414 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1415 			       "for redirect target\n");
1416 		goto out;
1417 	}
1418 
1419 	/*
1420 	 *	We have finally decided to accept it.
1421 	 */
1422 
1423 	neigh_update(neigh, lladdr, NUD_STALE,
1424 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1425 		     NEIGH_UPDATE_F_OVERRIDE|
1426 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1427 				     NEIGH_UPDATE_F_ISROUTER))
1428 		     );
1429 
1430 	/*
1431 	 * Redirect received -> path was valid.
1432 	 * Look, redirects are sent only in response to data packets,
1433 	 * so that this nexthop apparently is reachable. --ANK
1434 	 */
1435 	dst_confirm(&rt->u.dst);
1436 
1437 	/* Duplicate redirect: silently ignore. */
1438 	if (neigh == rt->u.dst.neighbour)
1439 		goto out;
1440 
1441 	nrt = ip6_rt_copy(rt);
1442 	if (nrt == NULL)
1443 		goto out;
1444 
1445 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1446 	if (on_link)
1447 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1448 
1449 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1450 	nrt->rt6i_dst.plen = 128;
1451 	nrt->u.dst.flags |= DST_HOST;
1452 
1453 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1454 	nrt->rt6i_nexthop = neigh_clone(neigh);
1455 	/* Reset pmtu, it may be better */
1456 	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1457 	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1458 
1459 	if (ip6_ins_rt(nrt))
1460 		goto out;
1461 
1462 	netevent.old = &rt->u.dst;
1463 	netevent.new = &nrt->u.dst;
1464 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1465 
1466 	if (rt->rt6i_flags&RTF_CACHE) {
1467 		ip6_del_rt(rt);
1468 		return;
1469 	}
1470 
1471 out:
1472 	dst_release(&rt->u.dst);
1473 	return;
1474 }
1475 
1476 /*
1477  *	Handle ICMP "packet too big" messages
1478  *	i.e. Path MTU discovery
1479  */
1480 
1481 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1482 			struct net_device *dev, u32 pmtu)
1483 {
1484 	struct rt6_info *rt, *nrt;
1485 	int allfrag = 0;
1486 
1487 	rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1488 	if (rt == NULL)
1489 		return;
1490 
1491 	if (pmtu >= dst_mtu(&rt->u.dst))
1492 		goto out;
1493 
1494 	if (pmtu < IPV6_MIN_MTU) {
1495 		/*
1496 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1497 		 * MTU (1280) and a fragment header should always be included
1498 		 * after a node receiving Too Big message reporting PMTU is
1499 		 * less than the IPv6 Minimum Link MTU.
1500 		 */
1501 		pmtu = IPV6_MIN_MTU;
1502 		allfrag = 1;
1503 	}
1504 
1505 	/* New mtu received -> path was valid.
1506 	   They are sent only in response to data packets,
1507 	   so that this nexthop apparently is reachable. --ANK
1508 	 */
1509 	dst_confirm(&rt->u.dst);
1510 
1511 	/* Host route. If it is static, it would be better
1512 	   not to override it, but add new one, so that
1513 	   when cache entry will expire old pmtu
1514 	   would return automatically.
1515 	 */
1516 	if (rt->rt6i_flags & RTF_CACHE) {
1517 		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1518 		if (allfrag)
1519 			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1520 		dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1521 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1522 		goto out;
1523 	}
1524 
1525 	/* Network route.
1526 	   Two cases are possible:
1527 	   1. It is connected route. Action: COW
1528 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1529 	 */
1530 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1531 		nrt = rt6_alloc_cow(rt, daddr, saddr);
1532 	else
1533 		nrt = rt6_alloc_clone(rt, daddr);
1534 
1535 	if (nrt) {
1536 		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1537 		if (allfrag)
1538 			nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1539 
1540 		/* According to RFC 1981, detecting PMTU increase shouldn't be
1541 		 * happened within 5 mins, the recommended timer is 10 mins.
1542 		 * Here this route expiration time is set to ip6_rt_mtu_expires
1543 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1544 		 * and detecting PMTU increase will be automatically happened.
1545 		 */
1546 		dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1547 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1548 
1549 		ip6_ins_rt(nrt);
1550 	}
1551 out:
1552 	dst_release(&rt->u.dst);
1553 }
1554 
1555 /*
1556  *	Misc support functions
1557  */
1558 
1559 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1560 {
1561 	struct rt6_info *rt = ip6_dst_alloc();
1562 
1563 	if (rt) {
1564 		rt->u.dst.input = ort->u.dst.input;
1565 		rt->u.dst.output = ort->u.dst.output;
1566 
1567 		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1568 		rt->u.dst.error = ort->u.dst.error;
1569 		rt->u.dst.dev = ort->u.dst.dev;
1570 		if (rt->u.dst.dev)
1571 			dev_hold(rt->u.dst.dev);
1572 		rt->rt6i_idev = ort->rt6i_idev;
1573 		if (rt->rt6i_idev)
1574 			in6_dev_hold(rt->rt6i_idev);
1575 		rt->u.dst.lastuse = jiffies;
1576 		rt->rt6i_expires = 0;
1577 
1578 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1579 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1580 		rt->rt6i_metric = 0;
1581 
1582 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1583 #ifdef CONFIG_IPV6_SUBTREES
1584 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1585 #endif
1586 		rt->rt6i_table = ort->rt6i_table;
1587 	}
1588 	return rt;
1589 }
1590 
1591 #ifdef CONFIG_IPV6_ROUTE_INFO
1592 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1593 					   struct in6_addr *gwaddr, int ifindex)
1594 {
1595 	struct fib6_node *fn;
1596 	struct rt6_info *rt = NULL;
1597 	struct fib6_table *table;
1598 
1599 	table = fib6_get_table(RT6_TABLE_INFO);
1600 	if (table == NULL)
1601 		return NULL;
1602 
1603 	write_lock_bh(&table->tb6_lock);
1604 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1605 	if (!fn)
1606 		goto out;
1607 
1608 	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1609 		if (rt->rt6i_dev->ifindex != ifindex)
1610 			continue;
1611 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1612 			continue;
1613 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1614 			continue;
1615 		dst_hold(&rt->u.dst);
1616 		break;
1617 	}
1618 out:
1619 	write_unlock_bh(&table->tb6_lock);
1620 	return rt;
1621 }
1622 
1623 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1624 					   struct in6_addr *gwaddr, int ifindex,
1625 					   unsigned pref)
1626 {
1627 	struct fib6_config cfg = {
1628 		.fc_table	= RT6_TABLE_INFO,
1629 		.fc_metric	= 1024,
1630 		.fc_ifindex	= ifindex,
1631 		.fc_dst_len	= prefixlen,
1632 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1633 				  RTF_UP | RTF_PREF(pref),
1634 	};
1635 
1636 	ipv6_addr_copy(&cfg.fc_dst, prefix);
1637 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1638 
1639 	/* We should treat it as a default route if prefix length is 0. */
1640 	if (!prefixlen)
1641 		cfg.fc_flags |= RTF_DEFAULT;
1642 
1643 	ip6_route_add(&cfg);
1644 
1645 	return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1646 }
1647 #endif
1648 
1649 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1650 {
1651 	struct rt6_info *rt;
1652 	struct fib6_table *table;
1653 
1654 	table = fib6_get_table(RT6_TABLE_DFLT);
1655 	if (table == NULL)
1656 		return NULL;
1657 
1658 	write_lock_bh(&table->tb6_lock);
1659 	for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1660 		if (dev == rt->rt6i_dev &&
1661 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1662 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1663 			break;
1664 	}
1665 	if (rt)
1666 		dst_hold(&rt->u.dst);
1667 	write_unlock_bh(&table->tb6_lock);
1668 	return rt;
1669 }
1670 
1671 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1672 				     struct net_device *dev,
1673 				     unsigned int pref)
1674 {
1675 	struct fib6_config cfg = {
1676 		.fc_table	= RT6_TABLE_DFLT,
1677 		.fc_metric	= 1024,
1678 		.fc_ifindex	= dev->ifindex,
1679 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1680 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1681 	};
1682 
1683 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1684 
1685 	ip6_route_add(&cfg);
1686 
1687 	return rt6_get_dflt_router(gwaddr, dev);
1688 }
1689 
1690 void rt6_purge_dflt_routers(void)
1691 {
1692 	struct rt6_info *rt;
1693 	struct fib6_table *table;
1694 
1695 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1696 	table = fib6_get_table(RT6_TABLE_DFLT);
1697 	if (table == NULL)
1698 		return;
1699 
1700 restart:
1701 	read_lock_bh(&table->tb6_lock);
1702 	for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1703 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1704 			dst_hold(&rt->u.dst);
1705 			read_unlock_bh(&table->tb6_lock);
1706 			ip6_del_rt(rt);
1707 			goto restart;
1708 		}
1709 	}
1710 	read_unlock_bh(&table->tb6_lock);
1711 }
1712 
1713 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1714 				 struct fib6_config *cfg)
1715 {
1716 	memset(cfg, 0, sizeof(*cfg));
1717 
1718 	cfg->fc_table = RT6_TABLE_MAIN;
1719 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1720 	cfg->fc_metric = rtmsg->rtmsg_metric;
1721 	cfg->fc_expires = rtmsg->rtmsg_info;
1722 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1723 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1724 	cfg->fc_flags = rtmsg->rtmsg_flags;
1725 
1726 	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1727 	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1728 	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1729 }
1730 
1731 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1732 {
1733 	struct fib6_config cfg;
1734 	struct in6_rtmsg rtmsg;
1735 	int err;
1736 
1737 	switch(cmd) {
1738 	case SIOCADDRT:		/* Add a route */
1739 	case SIOCDELRT:		/* Delete a route */
1740 		if (!capable(CAP_NET_ADMIN))
1741 			return -EPERM;
1742 		err = copy_from_user(&rtmsg, arg,
1743 				     sizeof(struct in6_rtmsg));
1744 		if (err)
1745 			return -EFAULT;
1746 
1747 		rtmsg_to_fib6_config(&rtmsg, &cfg);
1748 
1749 		rtnl_lock();
1750 		switch (cmd) {
1751 		case SIOCADDRT:
1752 			err = ip6_route_add(&cfg);
1753 			break;
1754 		case SIOCDELRT:
1755 			err = ip6_route_del(&cfg);
1756 			break;
1757 		default:
1758 			err = -EINVAL;
1759 		}
1760 		rtnl_unlock();
1761 
1762 		return err;
1763 	}
1764 
1765 	return -EINVAL;
1766 }
1767 
1768 /*
1769  *	Drop the packet on the floor
1770  */
1771 
1772 static inline int ip6_pkt_drop(struct sk_buff *skb, int code,
1773 			       int ipstats_mib_noroutes)
1774 {
1775 	int type;
1776 	switch (ipstats_mib_noroutes) {
1777 	case IPSTATS_MIB_INNOROUTES:
1778 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1779 		if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1780 			IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1781 			break;
1782 		}
1783 		/* FALLTHROUGH */
1784 	case IPSTATS_MIB_OUTNOROUTES:
1785 		IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1786 		break;
1787 	}
1788 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1789 	kfree_skb(skb);
1790 	return 0;
1791 }
1792 
1793 static int ip6_pkt_discard(struct sk_buff *skb)
1794 {
1795 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1796 }
1797 
1798 static int ip6_pkt_discard_out(struct sk_buff *skb)
1799 {
1800 	skb->dev = skb->dst->dev;
1801 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1802 }
1803 
1804 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1805 
1806 static int ip6_pkt_prohibit(struct sk_buff *skb)
1807 {
1808 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1809 }
1810 
1811 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1812 {
1813 	skb->dev = skb->dst->dev;
1814 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1815 }
1816 
1817 static int ip6_pkt_blk_hole(struct sk_buff *skb)
1818 {
1819 	kfree_skb(skb);
1820 	return 0;
1821 }
1822 
1823 #endif
1824 
1825 /*
1826  *	Allocate a dst for local (unicast / anycast) address.
1827  */
1828 
1829 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1830 				    const struct in6_addr *addr,
1831 				    int anycast)
1832 {
1833 	struct rt6_info *rt = ip6_dst_alloc();
1834 
1835 	if (rt == NULL)
1836 		return ERR_PTR(-ENOMEM);
1837 
1838 	dev_hold(init_net.loopback_dev);
1839 	in6_dev_hold(idev);
1840 
1841 	rt->u.dst.flags = DST_HOST;
1842 	rt->u.dst.input = ip6_input;
1843 	rt->u.dst.output = ip6_output;
1844 	rt->rt6i_dev = init_net.loopback_dev;
1845 	rt->rt6i_idev = idev;
1846 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1847 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1848 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1849 	rt->u.dst.obsolete = -1;
1850 
1851 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1852 	if (anycast)
1853 		rt->rt6i_flags |= RTF_ANYCAST;
1854 	else
1855 		rt->rt6i_flags |= RTF_LOCAL;
1856 	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1857 	if (rt->rt6i_nexthop == NULL) {
1858 		dst_free(&rt->u.dst);
1859 		return ERR_PTR(-ENOMEM);
1860 	}
1861 
1862 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1863 	rt->rt6i_dst.plen = 128;
1864 	rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1865 
1866 	atomic_set(&rt->u.dst.__refcnt, 1);
1867 
1868 	return rt;
1869 }
1870 
1871 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1872 {
1873 	if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1874 	    rt != &ip6_null_entry) {
1875 		RT6_TRACE("deleted by ifdown %p\n", rt);
1876 		return -1;
1877 	}
1878 	return 0;
1879 }
1880 
1881 void rt6_ifdown(struct net_device *dev)
1882 {
1883 	fib6_clean_all(fib6_ifdown, 0, dev);
1884 }
1885 
1886 struct rt6_mtu_change_arg
1887 {
1888 	struct net_device *dev;
1889 	unsigned mtu;
1890 };
1891 
1892 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1893 {
1894 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1895 	struct inet6_dev *idev;
1896 
1897 	/* In IPv6 pmtu discovery is not optional,
1898 	   so that RTAX_MTU lock cannot disable it.
1899 	   We still use this lock to block changes
1900 	   caused by addrconf/ndisc.
1901 	*/
1902 
1903 	idev = __in6_dev_get(arg->dev);
1904 	if (idev == NULL)
1905 		return 0;
1906 
1907 	/* For administrative MTU increase, there is no way to discover
1908 	   IPv6 PMTU increase, so PMTU increase should be updated here.
1909 	   Since RFC 1981 doesn't include administrative MTU increase
1910 	   update PMTU increase is a MUST. (i.e. jumbo frame)
1911 	 */
1912 	/*
1913 	   If new MTU is less than route PMTU, this new MTU will be the
1914 	   lowest MTU in the path, update the route PMTU to reflect PMTU
1915 	   decreases; if new MTU is greater than route PMTU, and the
1916 	   old MTU is the lowest MTU in the path, update the route PMTU
1917 	   to reflect the increase. In this case if the other nodes' MTU
1918 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
1919 	   PMTU discouvery.
1920 	 */
1921 	if (rt->rt6i_dev == arg->dev &&
1922 	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1923 	    (dst_mtu(&rt->u.dst) > arg->mtu ||
1924 	     (dst_mtu(&rt->u.dst) < arg->mtu &&
1925 	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1926 		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1927 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1928 	}
1929 	return 0;
1930 }
1931 
1932 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1933 {
1934 	struct rt6_mtu_change_arg arg = {
1935 		.dev = dev,
1936 		.mtu = mtu,
1937 	};
1938 
1939 	fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1940 }
1941 
1942 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
1943 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
1944 	[RTA_OIF]               = { .type = NLA_U32 },
1945 	[RTA_IIF]		= { .type = NLA_U32 },
1946 	[RTA_PRIORITY]          = { .type = NLA_U32 },
1947 	[RTA_METRICS]           = { .type = NLA_NESTED },
1948 };
1949 
1950 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1951 			      struct fib6_config *cfg)
1952 {
1953 	struct rtmsg *rtm;
1954 	struct nlattr *tb[RTA_MAX+1];
1955 	int err;
1956 
1957 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1958 	if (err < 0)
1959 		goto errout;
1960 
1961 	err = -EINVAL;
1962 	rtm = nlmsg_data(nlh);
1963 	memset(cfg, 0, sizeof(*cfg));
1964 
1965 	cfg->fc_table = rtm->rtm_table;
1966 	cfg->fc_dst_len = rtm->rtm_dst_len;
1967 	cfg->fc_src_len = rtm->rtm_src_len;
1968 	cfg->fc_flags = RTF_UP;
1969 	cfg->fc_protocol = rtm->rtm_protocol;
1970 
1971 	if (rtm->rtm_type == RTN_UNREACHABLE)
1972 		cfg->fc_flags |= RTF_REJECT;
1973 
1974 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1975 	cfg->fc_nlinfo.nlh = nlh;
1976 
1977 	if (tb[RTA_GATEWAY]) {
1978 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1979 		cfg->fc_flags |= RTF_GATEWAY;
1980 	}
1981 
1982 	if (tb[RTA_DST]) {
1983 		int plen = (rtm->rtm_dst_len + 7) >> 3;
1984 
1985 		if (nla_len(tb[RTA_DST]) < plen)
1986 			goto errout;
1987 
1988 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1989 	}
1990 
1991 	if (tb[RTA_SRC]) {
1992 		int plen = (rtm->rtm_src_len + 7) >> 3;
1993 
1994 		if (nla_len(tb[RTA_SRC]) < plen)
1995 			goto errout;
1996 
1997 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1998 	}
1999 
2000 	if (tb[RTA_OIF])
2001 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2002 
2003 	if (tb[RTA_PRIORITY])
2004 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2005 
2006 	if (tb[RTA_METRICS]) {
2007 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2008 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2009 	}
2010 
2011 	if (tb[RTA_TABLE])
2012 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2013 
2014 	err = 0;
2015 errout:
2016 	return err;
2017 }
2018 
2019 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2020 {
2021 	struct fib6_config cfg;
2022 	int err;
2023 
2024 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2025 	if (err < 0)
2026 		return err;
2027 
2028 	return ip6_route_del(&cfg);
2029 }
2030 
2031 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2032 {
2033 	struct fib6_config cfg;
2034 	int err;
2035 
2036 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2037 	if (err < 0)
2038 		return err;
2039 
2040 	return ip6_route_add(&cfg);
2041 }
2042 
2043 static inline size_t rt6_nlmsg_size(void)
2044 {
2045 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2046 	       + nla_total_size(16) /* RTA_SRC */
2047 	       + nla_total_size(16) /* RTA_DST */
2048 	       + nla_total_size(16) /* RTA_GATEWAY */
2049 	       + nla_total_size(16) /* RTA_PREFSRC */
2050 	       + nla_total_size(4) /* RTA_TABLE */
2051 	       + nla_total_size(4) /* RTA_IIF */
2052 	       + nla_total_size(4) /* RTA_OIF */
2053 	       + nla_total_size(4) /* RTA_PRIORITY */
2054 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2055 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2056 }
2057 
2058 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2059 			 struct in6_addr *dst, struct in6_addr *src,
2060 			 int iif, int type, u32 pid, u32 seq,
2061 			 int prefix, unsigned int flags)
2062 {
2063 	struct rtmsg *rtm;
2064 	struct nlmsghdr *nlh;
2065 	long expires;
2066 	u32 table;
2067 
2068 	if (prefix) {	/* user wants prefix routes only */
2069 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2070 			/* success since this is not a prefix route */
2071 			return 1;
2072 		}
2073 	}
2074 
2075 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2076 	if (nlh == NULL)
2077 		return -EMSGSIZE;
2078 
2079 	rtm = nlmsg_data(nlh);
2080 	rtm->rtm_family = AF_INET6;
2081 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2082 	rtm->rtm_src_len = rt->rt6i_src.plen;
2083 	rtm->rtm_tos = 0;
2084 	if (rt->rt6i_table)
2085 		table = rt->rt6i_table->tb6_id;
2086 	else
2087 		table = RT6_TABLE_UNSPEC;
2088 	rtm->rtm_table = table;
2089 	NLA_PUT_U32(skb, RTA_TABLE, table);
2090 	if (rt->rt6i_flags&RTF_REJECT)
2091 		rtm->rtm_type = RTN_UNREACHABLE;
2092 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2093 		rtm->rtm_type = RTN_LOCAL;
2094 	else
2095 		rtm->rtm_type = RTN_UNICAST;
2096 	rtm->rtm_flags = 0;
2097 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2098 	rtm->rtm_protocol = rt->rt6i_protocol;
2099 	if (rt->rt6i_flags&RTF_DYNAMIC)
2100 		rtm->rtm_protocol = RTPROT_REDIRECT;
2101 	else if (rt->rt6i_flags & RTF_ADDRCONF)
2102 		rtm->rtm_protocol = RTPROT_KERNEL;
2103 	else if (rt->rt6i_flags&RTF_DEFAULT)
2104 		rtm->rtm_protocol = RTPROT_RA;
2105 
2106 	if (rt->rt6i_flags&RTF_CACHE)
2107 		rtm->rtm_flags |= RTM_F_CLONED;
2108 
2109 	if (dst) {
2110 		NLA_PUT(skb, RTA_DST, 16, dst);
2111 		rtm->rtm_dst_len = 128;
2112 	} else if (rtm->rtm_dst_len)
2113 		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2114 #ifdef CONFIG_IPV6_SUBTREES
2115 	if (src) {
2116 		NLA_PUT(skb, RTA_SRC, 16, src);
2117 		rtm->rtm_src_len = 128;
2118 	} else if (rtm->rtm_src_len)
2119 		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2120 #endif
2121 	if (iif)
2122 		NLA_PUT_U32(skb, RTA_IIF, iif);
2123 	else if (dst) {
2124 		struct in6_addr saddr_buf;
2125 		if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2126 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2127 	}
2128 
2129 	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2130 		goto nla_put_failure;
2131 
2132 	if (rt->u.dst.neighbour)
2133 		NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2134 
2135 	if (rt->u.dst.dev)
2136 		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2137 
2138 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2139 
2140 	expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2141 	if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2142 			       expires, rt->u.dst.error) < 0)
2143 		goto nla_put_failure;
2144 
2145 	return nlmsg_end(skb, nlh);
2146 
2147 nla_put_failure:
2148 	nlmsg_cancel(skb, nlh);
2149 	return -EMSGSIZE;
2150 }
2151 
2152 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2153 {
2154 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2155 	int prefix;
2156 
2157 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2158 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2159 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2160 	} else
2161 		prefix = 0;
2162 
2163 	return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2164 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2165 		     prefix, NLM_F_MULTI);
2166 }
2167 
2168 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2169 {
2170 	struct nlattr *tb[RTA_MAX+1];
2171 	struct rt6_info *rt;
2172 	struct sk_buff *skb;
2173 	struct rtmsg *rtm;
2174 	struct flowi fl;
2175 	int err, iif = 0;
2176 
2177 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2178 	if (err < 0)
2179 		goto errout;
2180 
2181 	err = -EINVAL;
2182 	memset(&fl, 0, sizeof(fl));
2183 
2184 	if (tb[RTA_SRC]) {
2185 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2186 			goto errout;
2187 
2188 		ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2189 	}
2190 
2191 	if (tb[RTA_DST]) {
2192 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2193 			goto errout;
2194 
2195 		ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2196 	}
2197 
2198 	if (tb[RTA_IIF])
2199 		iif = nla_get_u32(tb[RTA_IIF]);
2200 
2201 	if (tb[RTA_OIF])
2202 		fl.oif = nla_get_u32(tb[RTA_OIF]);
2203 
2204 	if (iif) {
2205 		struct net_device *dev;
2206 		dev = __dev_get_by_index(&init_net, iif);
2207 		if (!dev) {
2208 			err = -ENODEV;
2209 			goto errout;
2210 		}
2211 	}
2212 
2213 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2214 	if (skb == NULL) {
2215 		err = -ENOBUFS;
2216 		goto errout;
2217 	}
2218 
2219 	/* Reserve room for dummy headers, this skb can pass
2220 	   through good chunk of routing engine.
2221 	 */
2222 	skb_reset_mac_header(skb);
2223 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2224 
2225 	rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2226 	skb->dst = &rt->u.dst;
2227 
2228 	err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2229 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2230 			    nlh->nlmsg_seq, 0, 0);
2231 	if (err < 0) {
2232 		kfree_skb(skb);
2233 		goto errout;
2234 	}
2235 
2236 	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2237 errout:
2238 	return err;
2239 }
2240 
2241 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2242 {
2243 	struct sk_buff *skb;
2244 	u32 pid = 0, seq = 0;
2245 	struct nlmsghdr *nlh = NULL;
2246 	int err = -ENOBUFS;
2247 
2248 	if (info) {
2249 		pid = info->pid;
2250 		nlh = info->nlh;
2251 		if (nlh)
2252 			seq = nlh->nlmsg_seq;
2253 	}
2254 
2255 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2256 	if (skb == NULL)
2257 		goto errout;
2258 
2259 	err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2260 	if (err < 0) {
2261 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2262 		WARN_ON(err == -EMSGSIZE);
2263 		kfree_skb(skb);
2264 		goto errout;
2265 	}
2266 	err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2267 errout:
2268 	if (err < 0)
2269 		rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
2270 }
2271 
2272 /*
2273  *	/proc
2274  */
2275 
2276 #ifdef CONFIG_PROC_FS
2277 
2278 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2279 
2280 struct rt6_proc_arg
2281 {
2282 	char *buffer;
2283 	int offset;
2284 	int length;
2285 	int skip;
2286 	int len;
2287 };
2288 
2289 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2290 {
2291 	struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2292 
2293 	if (arg->skip < arg->offset / RT6_INFO_LEN) {
2294 		arg->skip++;
2295 		return 0;
2296 	}
2297 
2298 	if (arg->len >= arg->length)
2299 		return 0;
2300 
2301 	arg->len += sprintf(arg->buffer + arg->len,
2302 			    NIP6_SEQFMT " %02x ",
2303 			    NIP6(rt->rt6i_dst.addr),
2304 			    rt->rt6i_dst.plen);
2305 
2306 #ifdef CONFIG_IPV6_SUBTREES
2307 	arg->len += sprintf(arg->buffer + arg->len,
2308 			    NIP6_SEQFMT " %02x ",
2309 			    NIP6(rt->rt6i_src.addr),
2310 			    rt->rt6i_src.plen);
2311 #else
2312 	arg->len += sprintf(arg->buffer + arg->len,
2313 			    "00000000000000000000000000000000 00 ");
2314 #endif
2315 
2316 	if (rt->rt6i_nexthop) {
2317 		arg->len += sprintf(arg->buffer + arg->len,
2318 				    NIP6_SEQFMT,
2319 				    NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2320 	} else {
2321 		arg->len += sprintf(arg->buffer + arg->len,
2322 				    "00000000000000000000000000000000");
2323 	}
2324 	arg->len += sprintf(arg->buffer + arg->len,
2325 			    " %08x %08x %08x %08x %8s\n",
2326 			    rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2327 			    rt->u.dst.__use, rt->rt6i_flags,
2328 			    rt->rt6i_dev ? rt->rt6i_dev->name : "");
2329 	return 0;
2330 }
2331 
2332 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2333 {
2334 	struct rt6_proc_arg arg = {
2335 		.buffer = buffer,
2336 		.offset = offset,
2337 		.length = length,
2338 	};
2339 
2340 	fib6_clean_all(rt6_info_route, 0, &arg);
2341 
2342 	*start = buffer;
2343 	if (offset)
2344 		*start += offset % RT6_INFO_LEN;
2345 
2346 	arg.len -= offset % RT6_INFO_LEN;
2347 
2348 	if (arg.len > length)
2349 		arg.len = length;
2350 	if (arg.len < 0)
2351 		arg.len = 0;
2352 
2353 	return arg.len;
2354 }
2355 
2356 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2357 {
2358 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2359 		      rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2360 		      rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2361 		      rt6_stats.fib_rt_cache,
2362 		      atomic_read(&ip6_dst_ops.entries),
2363 		      rt6_stats.fib_discarded_routes);
2364 
2365 	return 0;
2366 }
2367 
2368 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2369 {
2370 	return single_open(file, rt6_stats_seq_show, NULL);
2371 }
2372 
2373 static const struct file_operations rt6_stats_seq_fops = {
2374 	.owner	 = THIS_MODULE,
2375 	.open	 = rt6_stats_seq_open,
2376 	.read	 = seq_read,
2377 	.llseek	 = seq_lseek,
2378 	.release = single_release,
2379 };
2380 #endif	/* CONFIG_PROC_FS */
2381 
2382 #ifdef CONFIG_SYSCTL
2383 
2384 static int flush_delay;
2385 
2386 static
2387 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2388 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2389 {
2390 	if (write) {
2391 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2392 		fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2393 		return 0;
2394 	} else
2395 		return -EINVAL;
2396 }
2397 
2398 ctl_table ipv6_route_table[] = {
2399 	{
2400 		.ctl_name	=	NET_IPV6_ROUTE_FLUSH,
2401 		.procname	=	"flush",
2402 		.data		=	&flush_delay,
2403 		.maxlen		=	sizeof(int),
2404 		.mode		=	0200,
2405 		.proc_handler	=	&ipv6_sysctl_rtcache_flush
2406 	},
2407 	{
2408 		.ctl_name	=	NET_IPV6_ROUTE_GC_THRESH,
2409 		.procname	=	"gc_thresh",
2410 		.data		=	&ip6_dst_ops.gc_thresh,
2411 		.maxlen		=	sizeof(int),
2412 		.mode		=	0644,
2413 		.proc_handler	=	&proc_dointvec,
2414 	},
2415 	{
2416 		.ctl_name	=	NET_IPV6_ROUTE_MAX_SIZE,
2417 		.procname	=	"max_size",
2418 		.data		=	&ip6_rt_max_size,
2419 		.maxlen		=	sizeof(int),
2420 		.mode		=	0644,
2421 		.proc_handler	=	&proc_dointvec,
2422 	},
2423 	{
2424 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2425 		.procname	=	"gc_min_interval",
2426 		.data		=	&ip6_rt_gc_min_interval,
2427 		.maxlen		=	sizeof(int),
2428 		.mode		=	0644,
2429 		.proc_handler	=	&proc_dointvec_jiffies,
2430 		.strategy	=	&sysctl_jiffies,
2431 	},
2432 	{
2433 		.ctl_name	=	NET_IPV6_ROUTE_GC_TIMEOUT,
2434 		.procname	=	"gc_timeout",
2435 		.data		=	&ip6_rt_gc_timeout,
2436 		.maxlen		=	sizeof(int),
2437 		.mode		=	0644,
2438 		.proc_handler	=	&proc_dointvec_jiffies,
2439 		.strategy	=	&sysctl_jiffies,
2440 	},
2441 	{
2442 		.ctl_name	=	NET_IPV6_ROUTE_GC_INTERVAL,
2443 		.procname	=	"gc_interval",
2444 		.data		=	&ip6_rt_gc_interval,
2445 		.maxlen		=	sizeof(int),
2446 		.mode		=	0644,
2447 		.proc_handler	=	&proc_dointvec_jiffies,
2448 		.strategy	=	&sysctl_jiffies,
2449 	},
2450 	{
2451 		.ctl_name	=	NET_IPV6_ROUTE_GC_ELASTICITY,
2452 		.procname	=	"gc_elasticity",
2453 		.data		=	&ip6_rt_gc_elasticity,
2454 		.maxlen		=	sizeof(int),
2455 		.mode		=	0644,
2456 		.proc_handler	=	&proc_dointvec_jiffies,
2457 		.strategy	=	&sysctl_jiffies,
2458 	},
2459 	{
2460 		.ctl_name	=	NET_IPV6_ROUTE_MTU_EXPIRES,
2461 		.procname	=	"mtu_expires",
2462 		.data		=	&ip6_rt_mtu_expires,
2463 		.maxlen		=	sizeof(int),
2464 		.mode		=	0644,
2465 		.proc_handler	=	&proc_dointvec_jiffies,
2466 		.strategy	=	&sysctl_jiffies,
2467 	},
2468 	{
2469 		.ctl_name	=	NET_IPV6_ROUTE_MIN_ADVMSS,
2470 		.procname	=	"min_adv_mss",
2471 		.data		=	&ip6_rt_min_advmss,
2472 		.maxlen		=	sizeof(int),
2473 		.mode		=	0644,
2474 		.proc_handler	=	&proc_dointvec_jiffies,
2475 		.strategy	=	&sysctl_jiffies,
2476 	},
2477 	{
2478 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2479 		.procname	=	"gc_min_interval_ms",
2480 		.data		=	&ip6_rt_gc_min_interval,
2481 		.maxlen		=	sizeof(int),
2482 		.mode		=	0644,
2483 		.proc_handler	=	&proc_dointvec_ms_jiffies,
2484 		.strategy	=	&sysctl_ms_jiffies,
2485 	},
2486 	{ .ctl_name = 0 }
2487 };
2488 
2489 #endif
2490 
2491 void __init ip6_route_init(void)
2492 {
2493 #ifdef 	CONFIG_PROC_FS
2494 	struct proc_dir_entry *p;
2495 #endif
2496 	ip6_dst_ops.kmem_cachep =
2497 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2498 				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
2499 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2500 
2501 	fib6_init();
2502 #ifdef 	CONFIG_PROC_FS
2503 	p = proc_net_create(&init_net, "ipv6_route", 0, rt6_proc_info);
2504 	if (p)
2505 		p->owner = THIS_MODULE;
2506 
2507 	proc_net_fops_create(&init_net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2508 #endif
2509 #ifdef CONFIG_XFRM
2510 	xfrm6_init();
2511 #endif
2512 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2513 	fib6_rules_init();
2514 #endif
2515 
2516 	__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL);
2517 	__rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL);
2518 	__rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL);
2519 }
2520 
2521 void ip6_route_cleanup(void)
2522 {
2523 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2524 	fib6_rules_cleanup();
2525 #endif
2526 #ifdef CONFIG_PROC_FS
2527 	proc_net_remove(&init_net, "ipv6_route");
2528 	proc_net_remove(&init_net, "rt6_stats");
2529 #endif
2530 #ifdef CONFIG_XFRM
2531 	xfrm6_fini();
2532 #endif
2533 	rt6_ifdown(NULL);
2534 	fib6_gc_cleanup();
2535 	kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2536 }
2537